diff --git a/Make.inc b/Make.inc
index 08132bd8819f0..c78317ff4cbfc 100644
--- a/Make.inc
+++ b/Make.inc
@@ -1461,6 +1461,12 @@ endef
 # Overridable in Make.user
 WINE ?= wine
 
+ifeq ($(BINARY),32)
+HEAPLIM := --heap-size-hint=1000M
+else
+HEAPLIM :=
+endif
+
 # many of the following targets must be = not := because the expansion of the makefile functions (and $1) shouldn't happen until later
 ifeq ($(BUILD_OS), WINNT) # MSYS
 spawn = $(1)
diff --git a/NEWS.md b/NEWS.md
index 5dcde34c430aa..742a1c8609d01 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -24,6 +24,7 @@ Language changes
 
 Compiler/Runtime improvements
 -----------------------------
+* Updated GC heuristics to count allocated pages instead of individual objects ([#50144]).
 
 * Time to first execution (TTFX, sometimes called time to first plot) is greatly reduced. Package precompilation now
   saves native code into a "pkgimage", meaning that code generated during the precompilation process will not
diff --git a/doc/src/devdocs/gc.md b/doc/src/devdocs/gc.md
index 0aef8569a9647..363284286dae9 100644
--- a/doc/src/devdocs/gc.md
+++ b/doc/src/devdocs/gc.md
@@ -66,6 +66,12 @@ This scheme eliminates the need of explicitly keeping a flag to indicate a full
 ## Heuristics
 
 GC heuristics tune the GC by changing the size of the allocation interval between garbage collections.
-If a GC was unproductive, then we increase the size of the allocation interval to allow objects more time to die.
-If a GC returns a lot of space we can shrink the interval. The goal is to find a steady state where we are
-allocating just about the same amount as we are collecting.
+
+The GC heuristics measure how big the heap size is after a collection and set the next
+collection according to the algorithm described by https://dl.acm.org/doi/10.1145/3563323,
+in summary, it argues that the heap target should have a square root relationship with the live heap, and that it should also be scaled by how fast the GC is freeing objects and how fast the mutators are allocating.
+The heuristics measure the heap size by counting the number of pages that are in use and the objects that use malloc. Previously we measured the heap size by counting
+the alive objects, but that doesn't take into account fragmentation which could lead to bad decisions, that also meant that we used thread local information (allocations) to make
+decisions about a process wide (when to GC), measuring pages means the decision is global.
+
+The GC will do full collections when the heap size reaches 80% of the maximum allowed size.
diff --git a/src/gc-debug.c b/src/gc-debug.c
index 78768d5802824..3e67555c24e7c 100644
--- a/src/gc-debug.c
+++ b/src/gc-debug.c
@@ -1,7 +1,10 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #include "gc.h"
+#include "julia.h"
 #include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
 
 // re-include assert.h without NDEBUG,
@@ -847,11 +850,11 @@ void gc_time_pool_end(int sweep_full)
     double sweep_speed = sweep_gb / sweep_pool_sec;
     jl_safe_printf("GC sweep pools end %.2f ms at %.1f GB/s "
                    "(skipped %.2f %% of %" PRId64 ", swept %" PRId64 " pgs, "
-                   "%" PRId64 " freed with %" PRId64 " lazily) %s\n",
+                   "%" PRId64 " freed) %s\n",
                    sweep_pool_sec * 1000, sweep_speed,
                    (total_pages ? ((double)skipped_pages * 100) / total_pages : 0),
                    total_pages, total_pages - skipped_pages,
-                   freed_pages, lazy_freed_pages,
+                   freed_pages,
                    sweep_full ? "full" : "quick");
 }
 
@@ -943,12 +946,12 @@ void gc_time_sweep_pause(uint64_t gc_end_t, int64_t actual_allocd,
     jl_safe_printf("GC sweep pause %.2f ms live %" PRId64 " kB "
                    "(freed %" PRId64 " kB EST %" PRId64 " kB "
                    "[error %" PRId64 "] = %d%% of allocd b %" PRIu64 ") "
-                   "(%.2f ms in post_mark) %s | next in %" PRId64 " kB\n",
+                   "(%.2f ms in post_mark) %s\n",
                    jl_ns2ms(sweep_pause), live_bytes / 1024,
                    gc_num.freed / 1024, estimate_freed / 1024,
                    gc_num.freed - estimate_freed, pct, gc_num.since_sweep / 1024,
                    jl_ns2ms(gc_postmark_end - gc_premark_end),
-                   sweep_full ? "full" : "quick", -gc_num.allocd / 1024);
+                   sweep_full ? "full" : "quick");
 }
 
 void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
@@ -968,11 +971,35 @@ void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
         jl_safe_printf("TS: %" PRIu64 " Minor collection: estimate freed = %" PRIu64
                        " live = %" PRIu64 "m new interval = %" PRIu64 "m pause time = %"
                        PRIu64 "ms ttsp = %" PRIu64 "us mark time = %" PRIu64
-                       "ms sweep time = %" PRIu64 "ms \n",
+                       "ms sweep time = %" PRIu64 "ms\n",
                        end, freed, live/1024/1024,
                        interval/1024/1024, pause/1000000, ttsp,
                        mark/1000000,sweep/1000000);
 }
+
+void gc_heuristics_summary(
+        uint64_t old_alloc_diff, uint64_t alloc_mem,
+        uint64_t old_mut_time, uint64_t alloc_time,
+        uint64_t old_freed_diff, uint64_t gc_mem,
+        uint64_t old_pause_time, uint64_t gc_time,
+        int thrash_counter, const char *reason,
+        uint64_t current_heap, uint64_t target_heap)
+{
+    jl_safe_printf("Estimates: alloc_diff=%" PRIu64 "kB (%" PRIu64 ")"
+                            //"  nongc_time=%" PRIu64 "ns (%" PRIu64 ")"
+                            "  mut_time=%" PRIu64 "ns (%" PRIu64 ")"
+                            "  freed_diff=%" PRIu64 "kB (%" PRIu64 ")"
+                            "  pause_time=%" PRIu64 "ns (%" PRIu64 ")"
+                            "  thrash_counter=%d%s"
+                            "  current_heap=%" PRIu64 " MB"
+                            "  target_heap=%" PRIu64 " MB\n",
+                   old_alloc_diff/1024, alloc_mem/1024,
+                   old_mut_time/1000, alloc_time/1000,
+                   old_freed_diff/1024, gc_mem/1024,
+                   old_pause_time/1000, gc_time/1000,
+                   thrash_counter, reason,
+                   current_heap/1024/1024, target_heap/1024/1024);
+}
 #endif
 
 void jl_gc_debug_init(void)
@@ -1216,15 +1243,25 @@ JL_DLLEXPORT void jl_enable_gc_logging(int enable) {
     gc_logging_enabled = enable;
 }
 
-void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT {
+void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT {
     if (!gc_logging_enabled) {
         return;
     }
-    jl_safe_printf("GC: pause %.2fms. collected %fMB. %s %s\n",
-        pause/1e6, freed/1e6,
+    jl_safe_printf("\nGC: pause %.2fms. collected %fMB. %s %s\n",
+        pause/1e6, freed/(double)(1<<20),
         full ? "full" : "incr",
         recollect ? "recollect" : ""
     );
+
+    jl_safe_printf("Heap stats: bytes_mapped %.2f MB, bytes_resident %.2f MB,\nheap_size %.2f MB, heap_target %.2f MB, Fragmentation %.3f\n",
+        jl_atomic_load_relaxed(&gc_heap_stats.bytes_mapped)/(double)(1<<20),
+        jl_atomic_load_relaxed(&gc_heap_stats.bytes_resident)/(double)(1<<20),
+        // live_bytes/(double)(1<<20), live byes tracking is not accurate.
+        jl_atomic_load_relaxed(&gc_heap_stats.heap_size)/(double)(1<<20),
+        jl_atomic_load_relaxed(&gc_heap_stats.heap_target)/(double)(1<<20),
+        (double)live_bytes/(double)jl_atomic_load_relaxed(&gc_heap_stats.heap_size)
+    );
+    // Should fragmentation use bytes_resident instead of heap_size?
 }
 
 #ifdef __cplusplus
diff --git a/src/gc-pages.c b/src/gc-pages.c
index f015b5de2295e..d1383c91e27fa 100644
--- a/src/gc-pages.c
+++ b/src/gc-pages.c
@@ -83,6 +83,8 @@ char *jl_gc_try_alloc_pages_(int pg_cnt) JL_NOTSAFEPOINT
         // round data pointer up to the nearest gc_page_data-aligned
         // boundary if mmap didn't already do so.
         mem = (char*)gc_page_data(mem + GC_PAGE_SZ - 1);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mapped, pages_sz);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, pages_sz);
     return mem;
 }
 
@@ -138,6 +140,7 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
     // try to get page from `pool_freed`
     meta = pop_lf_back(&global_page_pool_freed);
     if (meta != NULL) {
+        jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, GC_PAGE_SZ);
         gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
         goto exit;
     }
@@ -213,6 +216,7 @@ void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
 #endif
     msan_unpoison(p, decommit_size);
     jl_atomic_fetch_add(&current_pg_count, -1);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, -decommit_size);
 }
 
 #ifdef __cplusplus
diff --git a/src/gc.c b/src/gc.c
index cb2bab9acd59f..3a6ca37b473e9 100644
--- a/src/gc.c
+++ b/src/gc.c
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #include "gc.h"
+#include "julia.h"
 #include "julia_gcext.h"
 #include "julia_assert.h"
 #ifdef __GLIBC__
@@ -130,12 +131,12 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_fre
 }
 
 JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable)
- {
-     if (enable)
-         jl_gc_register_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
-     else
-         jl_gc_deregister_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
- }
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
+}
 
 // Protect all access to `finalizer_list_marked` and `to_finalize`.
 // For accessing `ptls->finalizers`, the lock is needed if a thread
@@ -187,6 +188,8 @@ jl_gc_num_t gc_num = {0};
 static size_t last_long_collect_interval;
 int gc_n_threads;
 jl_ptls_t* gc_all_tls_states;
+gc_heapstatus_t gc_heap_stats = {0};
+int next_sweep_full = 0;
 const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00
 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void)
 {
@@ -778,7 +781,6 @@ static void gc_sweep_foreign_objs(void)
 }
 
 // GC knobs and self-measurement variables
-static int under_memory_pressure;
 static int64_t last_gc_total_bytes = 0;
 
 // max_total_memory is a suggestion.  We try very hard to stay
@@ -786,21 +788,30 @@ static int64_t last_gc_total_bytes = 0;
 #ifdef _P64
 typedef uint64_t memsize_t;
 static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*);
-static const size_t max_collect_interval = 1250000000UL;
 static size_t total_mem;
 // We expose this to the user/ci as jl_gc_set_max_memory
 static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024;
 #else
 typedef uint32_t memsize_t;
 static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*);
-static const size_t max_collect_interval =  500000000UL;
 // Work really hard to stay within 2GB
 // Alternative is to risk running out of address space
 // on 32 bit architectures.
-static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024;
+#define MAX32HEAP 1536 * 1024 * 1024
+static memsize_t max_total_memory = (memsize_t) MAX32HEAP;
 #endif
-
+// heuristic stuff for https://dl.acm.org/doi/10.1145/3563323
+// start with values that are in the target ranges to reduce transient hiccups at startup
+static uint64_t old_pause_time = 1e7; // 10 ms
+static uint64_t old_mut_time = 1e9; // 1 second
+static uint64_t old_heap_size = 0;
+static uint64_t old_alloc_diff = default_collect_interval;
+static uint64_t old_freed_diff = default_collect_interval;
+static uint64_t gc_end_time = 0;
+static int thrash_counter = 0;
+static int thrashing = 0;
 // global variables for GC stats
+static uint64_t freed_in_runtime = 0;
 
 // Resetting the object to a young object, this is used when marking the
 // finalizer list to collect them the next time because the object is very
@@ -852,6 +863,7 @@ static int64_t scanned_bytes; // young bytes scanned while marking
 static int64_t perm_scanned_bytes; // old bytes scanned while marking
 int prev_sweep_full = 1;
 int current_sweep_full = 0;
+int under_pressure = 0;
 
 // Full collection heuristics
 static int64_t live_bytes = 0;
@@ -1034,7 +1046,7 @@ void gc_setmark_buf(jl_ptls_t ptls, void *o, uint8_t mark_mode, size_t minsz) JL
 
 STATIC_INLINE void maybe_collect(jl_ptls_t ptls)
 {
-    if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) {
+    if (jl_atomic_load_relaxed(&gc_heap_stats.heap_size) >= jl_atomic_load_relaxed(&gc_heap_stats.heap_target) || jl_gc_debug_check_other()) {
         jl_gc_collect(JL_GC_AUTO);
     }
     else {
@@ -1101,6 +1113,22 @@ static void sweep_weak_refs(void)
 }
 
 
+STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
+{
+    uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc) + sz;
+    if (alloc_acc < 16*1024)
+        jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc);
+    else {
+        jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc);
+        jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
+    }
+}
+
+STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
+{
+    jl_atomic_store_relaxed(&ptls->gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_num.free_acc) + sz);
+}
+
 // big value list
 
 // Size includes the tag and the tag is not cleared!!
@@ -1123,6 +1151,7 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
         jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
     jl_atomic_store_relaxed(&ptls->gc_num.bigalloc,
         jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1);
+    jl_batch_accum_heap_size(ptls, allocsz);
 #ifdef MEMDEBUG
     memset(v, 0xee, allocsz);
 #endif
@@ -1175,6 +1204,8 @@ static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT
             if (nxt)
                 nxt->prev = pv;
             gc_num.freed += v->sz&~3;
+            jl_atomic_store_relaxed(&gc_heap_stats.heap_size,
+                jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - (v->sz&~3));
 #ifdef MEMDEBUG
             memset(v, 0xbb, v->sz&~3);
 #endif
@@ -1234,9 +1265,10 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
     jl_ptls_t ptls = jl_current_task->ptls;
     jl_atomic_store_relaxed(&ptls->gc_num.allocd,
         jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
+    jl_batch_accum_heap_size(ptls, sz);
 }
-
-static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
+// Only safe to update the heap inside the GC
+static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT
 {
     int gc_n_threads;
     jl_ptls_t* gc_all_tls_states;
@@ -1246,12 +1278,18 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
         jl_ptls_t ptls = gc_all_tls_states[i];
         if (ptls) {
             dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval);
-            dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.freed);
             dest->malloc += jl_atomic_load_relaxed(&ptls->gc_num.malloc);
             dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc);
             dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc);
             dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc);
-            dest->freecall += jl_atomic_load_relaxed(&ptls->gc_num.freecall);
+            dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
+            if (update_heap) {
+                uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
+                freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
+                jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size));
+                jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
+                jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
+            }
         }
     }
 }
@@ -1267,19 +1305,17 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
         if (ptls != NULL) {
             // don't reset `pool_live_bytes` here
             jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
-            jl_atomic_store_relaxed(&ptls->gc_num.freed, 0);
             jl_atomic_store_relaxed(&ptls->gc_num.malloc, 0);
             jl_atomic_store_relaxed(&ptls->gc_num.realloc, 0);
             jl_atomic_store_relaxed(&ptls->gc_num.poolalloc, 0);
             jl_atomic_store_relaxed(&ptls->gc_num.bigalloc, 0);
-            jl_atomic_store_relaxed(&ptls->gc_num.freecall, 0);
         }
     }
 }
 
 void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
 {
-    combine_thread_gc_counts(&gc_num);
+    combine_thread_gc_counts(&gc_num, 0);
     live_bytes += (gc_num.deferred_alloc + gc_num.allocd);
     gc_num.allocd = 0;
     gc_num.deferred_alloc = 0;
@@ -1308,6 +1344,8 @@ static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT
             jl_free_aligned(d);
         else
             free(d);
+        jl_atomic_store_relaxed(&gc_heap_stats.heap_size,
+            jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - jl_array_nbytes(a));
         gc_num.freed += jl_array_nbytes(a);
         gc_num.freecall++;
     }
@@ -1382,6 +1420,7 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
     set_page_metadata(pg);
     push_lf_back(&ptls->page_metadata_allocd, pg);
     jl_taggedvalue_t *fl = gc_reset_page(ptls, p, pg);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, GC_PAGE_SZ);
     p->newpages = fl;
     return fl;
 }
@@ -1606,9 +1645,11 @@ static void gc_sweep_page(jl_gc_pool_t *p, jl_gc_page_stack_t *allocd, jl_gc_pag
     else {
         gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED);
         if (keep_as_local_buffer) {
+            jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ);
             push_lf_back(buffered, pg);
         }
         else {
+            // `gc_heap_stats.heap_size` is updated in `gc_free_pages`
             push_lf_back(&global_page_pool_lazily_freed, pg);
         }
     }
@@ -3294,7 +3335,7 @@ JL_DLLEXPORT int jl_gc_is_enabled(void)
 JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT
 {
     jl_gc_num_t num = gc_num;
-    combine_thread_gc_counts(&num);
+    combine_thread_gc_counts(&num, 0);
     // Sync this logic with `base/util.jl:GC_Diff`
     *bytes = (num.total_allocd + num.deferred_alloc + num.allocd);
 }
@@ -3307,7 +3348,7 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void)
 JL_DLLEXPORT jl_gc_num_t jl_gc_num(void)
 {
     jl_gc_num_t num = gc_num;
-    combine_thread_gc_counts(&num);
+    combine_thread_gc_counts(&num, 0);
     return num;
 }
 
@@ -3356,16 +3397,51 @@ JL_DLLEXPORT int64_t jl_gc_live_bytes(void)
     return live_bytes;
 }
 
+uint64_t jl_gc_smooth(uint64_t old_val, uint64_t new_val, double factor)
+{
+    double est = factor * old_val + (1 - factor) * new_val;
+    if (est <= 1)
+        return 1; // avoid issues with <= 0
+    if (est > (uint64_t)2<<36)
+        return (uint64_t)2<<36; // avoid overflow
+    return est;
+}
+
+// an overallocation curve inspired by array allocations
+// grows very fast initially, then much slower at large heaps
+static uint64_t overallocation(uint64_t old_val, uint64_t val, uint64_t max_val)
+{
+    // compute maxsize = maxsize + 4*maxsize^(7/8) + maxsize/8
+    // for small n, we grow much faster than O(n)
+    // for large n, we grow at O(n/8)
+    // and as we reach O(memory) for memory>>1MB,
+    // this means we end by adding about 10% of memory each time at most
+    int exp2 = sizeof(old_val) * 8 -
+#ifdef _P64
+        __builtin_clzll(old_val);
+#else
+        __builtin_clz(old_val);
+#endif
+    uint64_t inc = (uint64_t)((size_t)1 << (exp2 * 7 / 8)) * 4 + old_val / 8;
+    // once overallocation would exceed max_val, grow by no more than 5% of max_val
+    if (inc + val > max_val)
+        if (inc > max_val / 20)
+            return max_val / 20;
+    return inc;
+}
+
 size_t jl_maxrss(void);
 
 // Only one thread should be running in this function
 static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 {
-    combine_thread_gc_counts(&gc_num);
+    combine_thread_gc_counts(&gc_num, 1);
 
     jl_gc_markqueue_t *mq = &ptls->mark_queue;
 
     uint64_t gc_start_time = jl_hrtime();
+    uint64_t mutator_time = gc_end_time == 0 ? old_mut_time : gc_start_time - gc_end_time;
+    uint64_t before_free_heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size);
     int64_t last_perm_scanned_bytes = perm_scanned_bytes;
     JL_PROBE_GC_MARK_BEGIN();
     uint64_t start_mark_time = jl_hrtime();
@@ -3416,7 +3492,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     uint64_t mark_time = end_mark_time - start_mark_time;
     gc_num.mark_time = mark_time;
     gc_num.total_mark_time += mark_time;
-    int64_t actual_allocd = gc_num.since_sweep;
+    gc_settime_postmark_end();
     // marking is over
 
     // 4. check for objects to finalize
@@ -3461,12 +3537,8 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     // Flush everything in mark cache
     gc_sync_all_caches_nolock(ptls);
 
-    int64_t live_sz_ub = live_bytes + actual_allocd;
-    int64_t live_sz_est = scanned_bytes + perm_scanned_bytes;
-    int64_t estimate_freed = live_sz_ub - live_sz_est;
 
     gc_verify(ptls);
-
     gc_stats_all_pool();
     gc_stats_big_obj();
     objprofile_printall();
@@ -3475,50 +3547,22 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     if (!prev_sweep_full)
         promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes;
     // 5. next collection decision
-    int not_freed_enough = (collection == JL_GC_AUTO) && estimate_freed < (7*(actual_allocd/10));
-    int nptr = 0;
+    int remset_nptr = 0;
+    int sweep_full = next_sweep_full;
+    int recollect = 0;
     assert(gc_n_threads);
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
         if (ptls2 != NULL)
-            nptr += ptls2->heap.remset_nptr;
+            remset_nptr += ptls2->heap.remset_nptr;
     }
+    (void)remset_nptr; //Use this information for something?
 
-    // many pointers in the intergen frontier => "quick" mark is not quick
-    int large_frontier = nptr*sizeof(void*) >= default_collect_interval;
-    int sweep_full = 0;
-    int recollect = 0;
-
-    // update heuristics only if this GC was automatically triggered
-    if (collection == JL_GC_AUTO) {
-        if (large_frontier) {
-            sweep_full = 1;
-            gc_num.interval = last_long_collect_interval;
-        }
-        if (not_freed_enough || large_frontier) {
-            gc_num.interval = gc_num.interval * 2;
-        }
-
-        size_t maxmem = 0;
-#ifdef _P64
-        // on a big memory machine, increase max_collect_interval to totalmem / nthreads / 2
-        maxmem = total_mem / (gc_n_threads - jl_n_gcthreads) / 2;
-#endif
-        if (maxmem < max_collect_interval)
-            maxmem = max_collect_interval;
-        if (gc_num.interval > maxmem) {
-            sweep_full = 1;
-            gc_num.interval = maxmem;
-        }
-    }
 
 
     // If the live data outgrows the suggested max_total_memory
     // we keep going with minimum intervals and full gcs until
     // we either free some space or get an OOM error.
-    if (live_bytes > max_total_memory) {
-        sweep_full = 1;
-    }
     if (gc_sweep_always_full) {
         sweep_full = 1;
     }
@@ -3531,7 +3575,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         // on the first collection after sweep_full, and the current scan
         perm_scanned_bytes = 0;
         promoted_bytes = 0;
-        last_long_collect_interval = gc_num.interval;
     }
     scanned_bytes = 0;
     // 6. start sweeping
@@ -3549,7 +3592,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         gc_sweep_perm_alloc();
     JL_PROBE_GC_SWEEP_END();
 
-    uint64_t gc_end_time = jl_hrtime();
+    gc_end_time = jl_hrtime();
     uint64_t pause = gc_end_time - gc_start_time;
     uint64_t sweep_time = gc_end_time - start_sweep_time;
     gc_num.total_sweep_time += sweep_time;
@@ -3561,6 +3604,103 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         gc_num.last_incremental_sweep = gc_end_time;
     }
 
+    size_t heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - freed_in_runtime;
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_size, heap_size);
+    freed_in_runtime = 0;
+    uint64_t user_max = max_total_memory * 0.8;
+    uint64_t alloc_diff = before_free_heap_size - old_heap_size;
+    uint64_t freed_diff = before_free_heap_size - heap_size;
+    uint64_t target_heap;
+    const char *reason = ""; (void)reason; // for GC_TIME output stats
+    old_heap_size = heap_size; // TODO: Update these values dynamically instead of just during the GC
+    if (collection == JL_GC_AUTO) {
+        // update any heuristics only when the user does not force the GC
+        // but still update the timings, since GC was run and reset, even if it was too early
+        uint64_t target_allocs = 0.0;
+        double alloc_smooth_factor = 0.95;
+        double collect_smooth_factor = 0.5;
+        double tuning_factor = 2e4;
+        uint64_t alloc_mem = jl_gc_smooth(old_alloc_diff, alloc_diff, alloc_smooth_factor);
+        uint64_t alloc_time = jl_gc_smooth(old_mut_time, mutator_time, alloc_smooth_factor); // TODO: subtract estimated finalizer time?
+        uint64_t gc_mem = jl_gc_smooth(old_freed_diff, freed_diff, collect_smooth_factor);
+        uint64_t gc_time = jl_gc_smooth(old_pause_time, pause - sweep_time, collect_smooth_factor);
+        old_alloc_diff = alloc_mem;
+        old_mut_time = alloc_time;
+        old_freed_diff = gc_mem;
+        old_pause_time = gc_time;
+        // thrashing estimator: if GC time more than 50% of the runtime
+        if (pause > mutator_time && !(thrash_counter < 4))
+            thrash_counter += 1;
+        else if (thrash_counter > 0)
+            thrash_counter -= 1;
+        if (alloc_mem != 0 && alloc_time != 0 && gc_mem != 0 && gc_time != 0) {
+            double alloc_rate = (double)alloc_mem/alloc_time;
+            double gc_rate = (double)gc_mem/gc_time;
+            target_allocs = sqrt((double)heap_size * alloc_rate / gc_rate) * tuning_factor;
+        }
+
+        if (thrashing == 0 && thrash_counter >= 3) {
+            // require 3 consecutive thrashing cycles to force the default allocator rate
+            thrashing = 1;
+            // and require 4 default allocations to clear
+            thrash_counter = 6;
+        }
+        else if (thrashing == 1 && thrash_counter <= 2) {
+            thrashing = 0; // maybe we should report this to the user or error out?
+        }
+
+        target_heap = target_allocs + heap_size;
+        // optionally smooth this:
+        //   target_heap = jl_gc_smooth(jl_atomic_load_relaxed(&gc_heap_stats.heap_target), target_heap, alloc_smooth_factor);
+
+        // compute some guardrails values
+        uint64_t min_target_allocs = heap_size / 20; // minimum 5% of current heap
+        if (min_target_allocs < default_collect_interval / 8) // unless the heap is small
+            min_target_allocs = default_collect_interval / 8;
+        uint64_t max_target_allocs = overallocation(before_free_heap_size, heap_size, user_max);
+        if (max_target_allocs < min_target_allocs)
+            max_target_allocs = min_target_allocs;
+        // respect max_total_memory first
+        if (target_heap > user_max) {
+            target_allocs = heap_size < user_max ? user_max - heap_size : 1;
+            reason = " user limit";
+        }
+        // If we are thrashing use a default only (an average) for a couple collections
+        if (thrashing) {
+            uint64_t thrashing_allocs = sqrt((double)min_target_allocs * max_target_allocs);
+            if (target_allocs < thrashing_allocs) {
+                target_allocs = thrashing_allocs;
+                reason = " thrashing";
+            }
+        }
+        // then add the guardrails for transient issues
+        if (target_allocs > max_target_allocs) {
+            target_allocs = max_target_allocs;
+            reason = " rate limit max";
+        }
+        else if (target_allocs < min_target_allocs) {
+            target_allocs = min_target_allocs;
+            reason = " min limit";
+        }
+        // and set the heap detection threshold
+        target_heap = target_allocs + heap_size;
+        if (target_heap < default_collect_interval) {
+            target_heap = default_collect_interval;
+            reason = " min heap";
+        }
+        jl_atomic_store_relaxed(&gc_heap_stats.heap_target, target_heap);
+    }
+    else {
+        target_heap = jl_atomic_load_relaxed(&gc_heap_stats.heap_target);
+    }
+
+    double old_ratio = (double)promoted_bytes/(double)heap_size;
+    if (heap_size > user_max || old_ratio > 0.15)
+        next_sweep_full = 1;
+    else
+        next_sweep_full = 0;
+    if (heap_size > user_max || thrashing)
+        under_pressure = 1;
     // sweeping is over
     // 7. if it is a quick sweep, put back the remembered objects in queued state
     // so that we don't trigger the barrier again on them.
@@ -3596,57 +3736,31 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     }
 #endif
 
-    _report_gc_finished(pause, gc_num.freed, sweep_full, recollect);
-
-    gc_final_pause_end(gc_start_time, gc_end_time);
-    gc_time_sweep_pause(gc_end_time, actual_allocd, live_bytes,
-                        estimate_freed, sweep_full);
-    gc_num.full_sweep += sweep_full;
+    _report_gc_finished(pause, gc_num.freed, sweep_full, recollect, live_bytes);
     uint64_t max_memory = last_live_bytes + gc_num.allocd;
     if (max_memory > gc_num.max_memory) {
         gc_num.max_memory = max_memory;
     }
-
-    gc_num.allocd = 0;
+    gc_final_pause_end(gc_start_time, gc_end_time);
+    gc_time_sweep_pause(gc_end_time, gc_num.allocd, live_bytes,
+                        gc_num.freed, sweep_full);
+    gc_num.full_sweep += sweep_full;
     last_live_bytes = live_bytes;
-    live_bytes += -gc_num.freed + gc_num.since_sweep;
-
-    if (collection == JL_GC_AUTO) {
-        //If we aren't freeing enough or are seeing lots and lots of pointers let it increase faster
-        if (!not_freed_enough || large_frontier) {
-            int64_t tot = 2 * (live_bytes + gc_num.since_sweep) / 3;
-            if (gc_num.interval > tot) {
-                gc_num.interval = tot;
-                last_long_collect_interval = tot;
-            }
-        }
-        // If the current interval is larger than half the live data decrease the interval
-        else {
-            int64_t half = (live_bytes / 2);
-            if (gc_num.interval > half)
-                gc_num.interval = half;
-        }
-        // But never go below default
-        if (gc_num.interval < default_collect_interval) gc_num.interval = default_collect_interval;
-    }
-
-    if (gc_num.interval + live_bytes > max_total_memory) {
-        if (live_bytes < max_total_memory) {
-            gc_num.interval = max_total_memory - live_bytes;
-            last_long_collect_interval = max_total_memory - live_bytes;
-        }
-        else {
-            // We can't stay under our goal so let's go back to
-            // the minimum interval and hope things get better
-            under_memory_pressure = 1;
-            gc_num.interval = default_collect_interval;
-        }
-    }
+    live_bytes += -gc_num.freed + gc_num.allocd;
 
     gc_time_summary(sweep_full, t_start, gc_end_time, gc_num.freed,
                     live_bytes, gc_num.interval, pause,
                     gc_num.time_to_safepoint,
                     gc_num.mark_time, gc_num.sweep_time);
+    if (collection == JL_GC_AUTO) {
+        gc_heuristics_summary(
+            old_alloc_diff, alloc_diff,
+            old_mut_time, mutator_time,
+            old_freed_diff, freed_diff,
+            old_pause_time, pause - sweep_time,
+            thrash_counter, reason,
+            heap_size, target_heap);
+    }
 
     prev_sweep_full = sweep_full;
     gc_num.pause += !recollect;
@@ -3742,12 +3856,11 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
 
     gc_invoke_callbacks(jl_gc_cb_post_gc_t,
         gc_cblist_post_gc, (collection));
-
-    if (under_memory_pressure) {
+    if (under_pressure) {
         gc_invoke_callbacks(jl_gc_cb_notify_gc_pressure_t,
             gc_cblist_notify_gc_pressure, ());
     }
-    under_memory_pressure = 0;
+    under_pressure = 0;
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
@@ -3841,6 +3954,7 @@ void jl_gc_init(void)
     arraylist_push(&eytzinger_idxs, (void*)0);
     arraylist_push(&eytzinger_image_tree, (void*)1); // outside image
 
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_target, default_collect_interval);
     gc_num.interval = default_collect_interval;
     last_long_collect_interval = default_collect_interval;
     gc_num.allocd = 0;
@@ -3851,23 +3965,22 @@ void jl_gc_init(void)
     total_mem = uv_get_total_memory();
     uint64_t constrained_mem = uv_get_constrained_memory();
     if (constrained_mem > 0 && constrained_mem < total_mem)
-        total_mem = constrained_mem;
-    double percent;
-    if (total_mem < 128e9)
-        percent = total_mem * 2.34375e-12 + 0.6; // 60% at 0 gigs and 90% at 128 to not
-    else                                         // overcommit too much on memory contrained devices
-        percent = 0.9;
-    max_total_memory = total_mem * percent;
+        jl_gc_set_max_memory(constrained_mem - 250*1024*1024); // LLVM + other libraries need some amount of memory
 #endif
     if (jl_options.heap_size_hint)
-        jl_gc_set_max_memory(jl_options.heap_size_hint);
+        jl_gc_set_max_memory(jl_options.heap_size_hint - 250*1024*1024);
+
     t_start = jl_hrtime();
 }
 
 void jl_gc_set_max_memory(uint64_t max_mem) {
     if (max_mem > 0
         && max_mem < (uint64_t)1 << (sizeof(memsize_t) * 8 - 1)) {
+        #ifdef _P64
         max_total_memory = max_mem;
+        #else
+        max_total_memory = max_mem < MAX32HEAP ? max_mem : MAX32HEAP;
+        #endif
     }
 }
 
@@ -3888,30 +4001,34 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
 {
     jl_gcframe_t **pgcstack = jl_get_pgcstack();
     jl_task_t *ct = jl_current_task;
-    if (pgcstack != NULL && ct->world_age) {
+    void *data = malloc(sz);
+    if (data != NULL && pgcstack != NULL && ct->world_age) {
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
         jl_atomic_store_relaxed(&ptls->gc_num.allocd,
             jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
         jl_atomic_store_relaxed(&ptls->gc_num.malloc,
             jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+        jl_batch_accum_heap_size(ptls, sz);
     }
-    return malloc(sz);
+    return data;
 }
 
 JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
 {
     jl_gcframe_t **pgcstack = jl_get_pgcstack();
     jl_task_t *ct = jl_current_task;
-    if (pgcstack != NULL && ct->world_age) {
+    void *data = calloc(nm, sz);
+    if (data != NULL && pgcstack != NULL && ct->world_age) {
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
         jl_atomic_store_relaxed(&ptls->gc_num.allocd,
             jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz);
         jl_atomic_store_relaxed(&ptls->gc_num.malloc,
             jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+        jl_batch_accum_heap_size(ptls, sz * nm);
     }
-    return calloc(nm, sz);
+    return data;
 }
 
 JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
@@ -3920,11 +4037,7 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
     jl_task_t *ct = jl_current_task;
     free(p);
     if (pgcstack != NULL && ct->world_age) {
-        jl_ptls_t ptls = ct->ptls;
-        jl_atomic_store_relaxed(&ptls->gc_num.freed,
-            jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz);
-        jl_atomic_store_relaxed(&ptls->gc_num.freecall,
-            jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1);
+        jl_batch_accum_free_size(ct->ptls, sz);
     }
 }
 
@@ -3932,19 +4045,25 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size
 {
     jl_gcframe_t **pgcstack = jl_get_pgcstack();
     jl_task_t *ct = jl_current_task;
-    if (pgcstack != NULL && ct->world_age) {
+    void *data = realloc(p, sz);
+    if (data != NULL && pgcstack != NULL && ct->world_age) {
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        if (sz < old)
-            jl_atomic_store_relaxed(&ptls->gc_num.freed,
-                jl_atomic_load_relaxed(&ptls->gc_num.freed) + (old - sz));
-        else
+        if (!(sz < old))
             jl_atomic_store_relaxed(&ptls->gc_num.allocd,
                 jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old));
         jl_atomic_store_relaxed(&ptls->gc_num.realloc,
             jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
+
+        int64_t diff = sz - old;
+        if (diff < 0) {
+            jl_batch_accum_free_size(ptls, -diff);
+        }
+        else {
+            jl_batch_accum_heap_size(ptls, diff);
+        }
     }
-    return realloc(p, sz);
+    return data;
 }
 
 // allocation wrappers that save the size of allocations, to allow using
@@ -4013,10 +4132,7 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
     size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
     if (allocsz < sz)  // overflow in adding offs, size was "negative"
         jl_throw(jl_memory_exception);
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
-    jl_atomic_store_relaxed(&ptls->gc_num.malloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
@@ -4024,6 +4140,12 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
     void *b = malloc_cache_align(allocsz);
     if (b == NULL)
         jl_throw(jl_memory_exception);
+
+    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_num.malloc,
+        jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+    jl_batch_accum_heap_size(ptls, allocsz);
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
@@ -4038,24 +4160,11 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds
 {
     if (can_collect)
         maybe_collect(ptls);
-
+    int is_old_marked = jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED;
     size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
     if (allocsz < sz)  // overflow in adding offs, size was "negative"
         jl_throw(jl_memory_exception);
 
-    if (jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED) {
-        ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz;
-        live_bytes += allocsz - oldsz;
-    }
-    else if (allocsz < oldsz)
-        jl_atomic_store_relaxed(&ptls->gc_num.freed,
-            jl_atomic_load_relaxed(&ptls->gc_num.freed) + (oldsz - allocsz));
-    else
-        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz));
-    jl_atomic_store_relaxed(&ptls->gc_num.realloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
-
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
@@ -4072,6 +4181,23 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds
 #endif
     errno = last_errno;
     // gc_managed_realloc_ is currently used exclusively for resizing array buffers.
+    if (is_old_marked) {
+        ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz;
+        live_bytes += allocsz - oldsz;
+    }
+    else if (!(allocsz < oldsz))
+        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz));
+    jl_atomic_store_relaxed(&ptls->gc_num.realloc,
+        jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
+
+    int64_t diff = allocsz - oldsz;
+    if (diff < 0) {
+        jl_batch_accum_free_size(ptls, -diff);
+    }
+    else {
+        jl_batch_accum_heap_size(ptls, diff);
+    }
     if (allocsz > oldsz) {
         maybe_record_alloc_to_profile((jl_value_t*)b, allocsz - oldsz, (jl_datatype_t*)jl_buff_tag);
     }
@@ -4147,6 +4273,7 @@ static void *gc_perm_alloc_large(size_t sz, int zero, unsigned align, unsigned o
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size,sz);
     errno = last_errno;
     jl_may_leak(base);
     assert(align > 0);
diff --git a/src/gc.h b/src/gc.h
index ff52269b73af9..6e6b602746206 100644
--- a/src/gc.h
+++ b/src/gc.h
@@ -9,6 +9,8 @@
 #ifndef JL_GC_H
 #define JL_GC_H
 
+#include <stddef.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
@@ -276,6 +278,13 @@ typedef struct {
     pagetable1_t *meta1[REGION2_PG_COUNT];
 } pagetable_t;
 
+typedef struct {
+    _Atomic(size_t) bytes_mapped;
+    _Atomic(size_t) bytes_resident;
+    _Atomic(size_t) heap_size;
+    _Atomic(size_t) heap_target;
+} gc_heapstatus_t;
+
 #define GC_PAGE_UNMAPPED        0
 #define GC_PAGE_ALLOCATED       1
 #define GC_PAGE_LAZILY_FREED    2
@@ -393,6 +402,7 @@ extern int64_t buffered_pages;
 extern int gc_first_tid;
 extern int gc_n_threads;
 extern jl_ptls_t* gc_all_tls_states;
+extern gc_heapstatus_t gc_heap_stats;
 
 STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT
 {
@@ -515,6 +525,13 @@ void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
                      uint64_t freed, uint64_t live, uint64_t interval,
                      uint64_t pause, uint64_t ttsp, uint64_t mark,
                      uint64_t sweep);
+void gc_heuristics_summary(
+        uint64_t old_alloc_diff, uint64_t alloc_mem,
+        uint64_t old_mut_time, uint64_t alloc_time,
+        uint64_t old_freed_diff, uint64_t gc_mem,
+        uint64_t old_pause_time, uint64_t gc_time,
+        int thrash_counter, const char *reason,
+        uint64_t current_heap, uint64_t target_heap);
 #else
 #define gc_time_pool_start()
 STATIC_INLINE void gc_time_count_page(int freedall, int pg_skpd) JL_NOTSAFEPOINT
@@ -542,6 +559,13 @@ STATIC_INLINE void gc_time_count_mallocd_array(int bits) JL_NOTSAFEPOINT
                             estimate_freed, sweep_full)
 #define  gc_time_summary(sweep_full, start, end, freed, live,           \
                          interval, pause, ttsp, mark, sweep)
+#define gc_heuristics_summary( \
+        old_alloc_diff, alloc_mem, \
+        old_mut_time, alloc_time, \
+        old_freed_diff, gc_mem, \
+        old_pause_time, gc_time, \
+        thrash_counter, reason, \
+        current_heap, target_heap)
 #endif
 
 #ifdef MEMFENCE
@@ -658,8 +682,7 @@ void gc_count_pool(void);
 size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_enable_gc_logging(int enable);
-JL_DLLEXPORT uint32_t jl_get_num_stack_mappings(void);
-void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT;
+void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
 }
diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc
index 82bdfc1f5b7eb..b92455c4156cf 100644
--- a/src/jl_exported_funcs.inc
+++ b/src/jl_exported_funcs.inc
@@ -195,6 +195,7 @@
     XX(jl_gc_schedule_foreign_sweepfunc) \
     XX(jl_gc_set_cb_notify_external_alloc) \
     XX(jl_gc_set_cb_notify_external_free) \
+    XX(jl_gc_set_cb_notify_gc_pressure) \
     XX(jl_gc_set_cb_post_gc) \
     XX(jl_gc_set_cb_pre_gc) \
     XX(jl_gc_set_cb_root_scanner) \
diff --git a/src/julia_threads.h b/src/julia_threads.h
index f69f9dd4baacf..085c434391b41 100644
--- a/src/julia_threads.h
+++ b/src/julia_threads.h
@@ -129,12 +129,12 @@ typedef struct {
 typedef struct {
     _Atomic(int64_t) allocd;
     _Atomic(int64_t) pool_live_bytes;
-    _Atomic(int64_t) freed;
     _Atomic(uint64_t) malloc;
     _Atomic(uint64_t) realloc;
     _Atomic(uint64_t) poolalloc;
     _Atomic(uint64_t) bigalloc;
-    _Atomic(uint64_t) freecall;
+    _Atomic(int64_t) free_acc;
+    _Atomic(uint64_t) alloc_acc;
 } jl_thread_gc_num_t;
 
 typedef struct {
diff --git a/src/staticdata.c b/src/staticdata.c
index c12c9b4b58562..6ce580c301899 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -3322,7 +3322,7 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im
         JL_SIGATOMIC_BEGIN();
         size_t len = dataendpos - datastartpos;
         char *sysimg;
-        bool success = !needs_permalloc;
+        int success = !needs_permalloc;
         ios_seek(f, datastartpos);
         if (needs_permalloc)
             sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0);
diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl
index f73a7854fd2f1..2f80760e4e0b0 100644
--- a/test/cmdlineargs.jl
+++ b/test/cmdlineargs.jl
@@ -876,6 +876,6 @@ end
         @test lines[3] == "foo"
         @test lines[4] == "bar"
     end
-#heap-size-hint
-@test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=500M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "524288000"
+#heap-size-hint, we reserve 250 MB for non GC memory (llvm, etc.)
+@test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=500M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "$((500-250)*1024*1024)"
 end
diff --git a/test/testenv.jl b/test/testenv.jl
index 41706dd24e75e..a4bec15a34130 100644
--- a/test/testenv.jl
+++ b/test/testenv.jl
@@ -37,6 +37,10 @@ if !@isdefined(testenv_defined)
 
     function addprocs_with_testenv(X; rr_allowed=true, kwargs...)
         exename = rr_allowed ? `$rr_exename $test_exename` : test_exename
+        if X isa Integer
+            heap_size=round(Int,(Sys.free_memory()/(1024^2)/(X+1)))
+            push!(test_exeflags.exec, "--heap-size-hint=$(heap_size)M")
+        end
         addprocs(X; exename=exename, exeflags=test_exeflags, kwargs...)
     end