From 03e10c49d204a488f8bd56c7b7262e717ee61762 Mon Sep 17 00:00:00 2001 From: Oscar Spencer Date: Tue, 25 Jun 2024 14:28:56 -0500 Subject: [PATCH] feat(stdlib): Faster memory allocator (#2124) --- compiler/test/input/mallocTight.gr | 34 -- compiler/test/runner.re | 58 +-- compiler/test/suites/basic_functionality.re | 2 +- compiler/test/suites/gc.re | 59 +-- docs/contributor/memory_management.md | 8 +- stdlib/runtime/malloc.gr | 404 +++++++++++++------- stdlib/runtime/malloc.md | 13 +- stdlib/wasi/process.gr | 3 +- 8 files changed, 341 insertions(+), 240 deletions(-) delete mode 100644 compiler/test/input/mallocTight.gr diff --git a/compiler/test/input/mallocTight.gr b/compiler/test/input/mallocTight.gr deleted file mode 100644 index 3d6670a23e..0000000000 --- a/compiler/test/input/mallocTight.gr +++ /dev/null @@ -1,34 +0,0 @@ -@runtimeMode -module MallocTight - -from "runtime/malloc" include Malloc -from "runtime/unsafe/wasmi32" include WasmI32 -use WasmI32.{ (+), (-), (*), (==), (!=) } - -primitive assert = "@assert" -primitive ignore = "@ignore" - -primitive heapStart = "@heap.start" - -// Because we're in runtime mode, malloc has yet to be called -// This provides us with a clean slate to test malloc - -// allow malloc to initialize -ignore(Malloc.malloc(8n)) - -// The free list should be pointing at the base -let base = Malloc.getFreePtr() -assert base == Malloc._RESERVED_RUNTIME_SPACE + heapStart() - -// grab the next (and only) block in the free list -let block = WasmI32.load(Malloc.getFreePtr(), 0n) -assert WasmI32.load(block, 0n) == base - -// When we allocate, an extra 8 bytes is reserved for the block header -let remainingMemory = WasmI32.load(block, 4n) - 8n - -let firstPtr = Malloc.malloc(remainingMemory) -let secondPtr = Malloc.malloc(remainingMemory) - -// These two pointers should (obviously) be different -assert firstPtr != secondPtr diff --git a/compiler/test/runner.re b/compiler/test/runner.re index 2000d1b027..b1b64cff2b 100644 --- a/compiler/test/runner.re +++ b/compiler/test/runner.re @@ -39,7 +39,7 @@ let graindoc_out_file = name => let gaindoc_in_file = name => Filepath.to_string(Fp.At.(test_gaindoc_dir / (name ++ ".input.gr"))); -let compile = (~num_pages=?, ~config_fn=?, ~hook=?, name, prog) => { +let compile = (~num_pages=?, ~max_pages=?, ~config_fn=?, ~hook=?, name, prog) => { Config.preserve_all_configs(() => { Config.with_config( Config.empty, @@ -49,11 +49,10 @@ let compile = (~num_pages=?, ~config_fn=?, ~hook=?, name, prog) => { | None => () }; switch (num_pages) { - | Some(pages) => - Config.initial_memory_pages := pages; - Config.maximum_memory_pages := Some(pages); + | Some(pages) => Config.initial_memory_pages := pages | None => () }; + Config.maximum_memory_pages := max_pages; Config.include_dirs := [Filepath.to_string(test_libs_dir), ...Config.include_dirs^]; let outfile = wasmfile(name); @@ -63,7 +62,8 @@ let compile = (~num_pages=?, ~config_fn=?, ~hook=?, name, prog) => { }); }; -let compile_file = (~num_pages=?, ~config_fn=?, ~hook=?, filename, outfile) => { +let compile_file = + (~num_pages=?, ~max_pages=?, ~config_fn=?, ~hook=?, filename, outfile) => { Config.preserve_all_configs(() => { Config.with_config( Config.empty, @@ -73,11 +73,10 @@ let compile_file = (~num_pages=?, ~config_fn=?, ~hook=?, filename, outfile) => { | None => () }; switch (num_pages) { - | Some(pages) => - Config.initial_memory_pages := pages; - Config.maximum_memory_pages := Some(pages); + | Some(pages) => Config.initial_memory_pages := pages | None => () }; + Config.maximum_memory_pages := max_pages; Config.include_dirs := [Filepath.to_string(test_libs_dir), ...Config.include_dirs^]; compile_file(~is_root_file=true, ~hook?, ~outfile, filename); @@ -153,18 +152,7 @@ let open_process = args => { (code, out, err); }; -let run = (~num_pages=?, ~extra_args=[||], file) => { - let mem_flags = - switch (num_pages) { - | Some(x) => [| - "--initial-memory-pages", - string_of_int(x), - "--maximum-memory-pages", - string_of_int(x), - |] - | None => [||] - }; - +let run = (~extra_args=[||], file) => { let stdlib = Option.get(Grain_utils.Config.stdlib_dir^); let preopen = @@ -186,7 +174,6 @@ let run = (~num_pages=?, ~extra_args=[||], file) => { let cmd = Array.concat([ [|"grain", "run"|], - mem_flags, [|"-S", stdlib, "-I", Filepath.to_string(test_libs_dir), preopen|], [|file|], extra_args, @@ -306,6 +293,7 @@ let makeRunner = ( test, ~num_pages=?, + ~max_pages=?, ~config_fn=?, ~extra_args=?, ~module_header=module_header, @@ -315,8 +303,15 @@ let makeRunner = ) => { test(name, ({expect}) => { Config.preserve_all_configs(() => { - ignore @@ compile(~num_pages?, ~config_fn?, name, module_header ++ prog); - let (result, _) = run(~num_pages?, ~extra_args?, wasmfile(name)); + ignore @@ + compile( + ~num_pages?, + ~max_pages?, + ~config_fn?, + name, + module_header ++ prog, + ); + let (result, _) = run(~extra_args?, wasmfile(name)); expect.string(result).toEqual(expected); }) }); @@ -327,6 +322,7 @@ let makeErrorRunner = test, ~check_exists=true, ~num_pages=?, + ~max_pages=?, ~config_fn=?, ~module_header=module_header, name, @@ -335,8 +331,15 @@ let makeErrorRunner = ) => { test(name, ({expect}) => { Config.preserve_all_configs(() => { - ignore @@ compile(~num_pages?, ~config_fn?, name, module_header ++ prog); - let (result, _) = run(~num_pages?, wasmfile(name)); + ignore @@ + compile( + ~num_pages?, + ~max_pages?, + ~config_fn?, + name, + module_header ++ prog, + ); + let (result, _) = run(wasmfile(name)); if (check_exists) { expect.string(result).toMatch(expected); } else { @@ -347,12 +350,13 @@ let makeErrorRunner = }; let makeFileRunner = - (test, ~num_pages=?, ~config_fn=?, name, filename, expected) => { + (test, ~num_pages=?, ~max_pages=?, ~config_fn=?, name, filename, expected) => { test(name, ({expect}) => { Config.preserve_all_configs(() => { let infile = grainfile(filename); let outfile = wasmfile(name); - ignore @@ compile_file(~num_pages?, ~config_fn?, infile, outfile); + ignore @@ + compile_file(~num_pages?, ~max_pages?, ~config_fn?, infile, outfile); let (result, _) = run(outfile); expect.string(result).toEqual(expected); }) diff --git a/compiler/test/suites/basic_functionality.re b/compiler/test/suites/basic_functionality.re index 3d1475077f..4607d39675 100644 --- a/compiler/test/suites/basic_functionality.re +++ b/compiler/test/suites/basic_functionality.re @@ -377,6 +377,6 @@ describe("basic functionality", ({test, testSkip}) => { ~config_fn=smallestFileConfig, "smallest_grain_program", "", - 4769, + 5165, ); }); diff --git a/compiler/test/suites/gc.re b/compiler/test/suites/gc.re index a1fcfb3e11..0d15556187 100644 --- a/compiler/test/suites/gc.re +++ b/compiler/test/suites/gc.re @@ -8,24 +8,25 @@ let makeGcProgram = (program, heap_size) => { from "runtime/malloc" include Malloc from "runtime/unsafe/memory" include Memory - @disableGC - primitive heapStart = "@heap.start" - - @disableGC - let leak = () => { - use WasmI32.{ (+), (-) } - // find current memory pointer, subtract space for two malloc headers + 1 GC header - let offset = Memory.malloc(8n) - 24n - // Calculate how much memory is left - let availableMemory = offset - (Malloc._RESERVED_RUNTIME_SPACE + heapStart()) - // Calculate how much memory to leak - let toLeak = availableMemory - %dn - // Memory is not reclaimed due to no gc context - // This will actually leak 16 extra bytes because of the headers - Memory.malloc(toLeak - 16n); - void + @unsafe + let _ = { + use WasmI32.{(*), (-), (==)} + // Leak all available memory + // The first call to malloc ensures it has been initialized + Malloc.malloc(8n) + Malloc.leakAll() + // Next allocation will grow the memory by 1 page (64kib) + // We'll manually leak all memory except what should be reserved for the test + // Round reserved memory to nearest block size + let reserved = %dn + // If only one unit is requested, the allocator will include it in our next malloc, + // so we request 2 instead + let reserved = if (reserved == 1n) 2n else reserved + // one page - 2 malloc headers - 1 gc header - extra morecore unit - reserved space + let toLeak = 65536n - 16n - 8n - 64n - reserved * 64n + Memory.malloc(toLeak) } - leak(); + %s |}, heap_size, @@ -43,6 +44,7 @@ describe("garbage collection", ({test, testSkip}) => { let assertRunGC = (name, heapSize, prog, expected) => makeRunner( ~num_pages=1, + ~max_pages=2, test_or_skip, name, makeGcProgram(prog, heapSize), @@ -52,38 +54,42 @@ describe("garbage collection", ({test, testSkip}) => { makeErrorRunner( test_or_skip, ~num_pages=1, + ~max_pages=2, name, makeGcProgram(prog, heapSize), expected, ); // oom tests + // The allocator will use 2 units for the first allocation and then oom assertRunGCError( "oomgc1", - 48, + 2, "(1, (3, 4))", "Maximum memory size exceeded", ); - assertRunGC("oomgc2", 64, "(1, (3, 4))", ""); - assertRunGC("oomgc3", 32, "(3, 4)", ""); + // This requires only 2 units, but if only two are requested they would be + // used by the first allocation + assertRunGC("oomgc2", 3, "(1, (3, 4))", ""); + assertRunGC("oomgc3", 1, "(3, 4)", ""); // gc tests assertRunGC( "gc1", - 160, + 5, "let f = (() => (1, 2));\n {\n f();\n f();\n f();\n f()\n }", "", ); /* https://github.com/grain-lang/grain/issues/774 */ assertRunGC( "gc3", - 1024, + 17, "let foo = (s: String) => void\nlet printBool = (b: Bool) => foo(if (b) \"true\" else \"false\")\n\nlet b = true\nfor (let mut i=0; i<100000; i += 1) {\n printBool(true)\n}", "", ); assertRunGCError( "fib_gc_err", - 256, + 5, {| let fib = x => { let rec fib_help = (n, acc) => { @@ -102,7 +108,7 @@ describe("garbage collection", ({test, testSkip}) => { ); assertRunGC( "fib_gc", - 512, + 9, {| let fib = x => { let rec fib_help = (n, acc) => { @@ -121,7 +127,7 @@ describe("garbage collection", ({test, testSkip}) => { ); assertRunGC( "loop_gc", - 256, + 5, {| for (let mut i = 0; i < 512; i += 1) { let string = "string" @@ -134,7 +140,7 @@ describe("garbage collection", ({test, testSkip}) => { ); assertRunGC( "long_lists", - 20000, + 350, {| from "list" include List use List.* @@ -162,7 +168,6 @@ describe("garbage collection", ({test, testSkip}) => { |}, "true\n", ); - assertFileRun("malloc_tight", "mallocTight", ""); assertFileRun("memory_grow1", "memoryGrow", "1000000000000\n"); assertMemoryLimitedFileRun( "loop_memory_reclaim", diff --git a/docs/contributor/memory_management.md b/docs/contributor/memory_management.md index f4266f9497..de54e0af51 100644 --- a/docs/contributor/memory_management.md +++ b/docs/contributor/memory_management.md @@ -6,7 +6,7 @@ We ultimately aim to replace Grain's bespoke memory management with the WebAssem ## Memory Allocator -Grain uses a [memory allocator](https://github.com/grain-lang/grain/blob/main/stdlib/runtime/malloc.gr) derived from the `malloc`/`free` example given in Kernighan and Ritchie's ["The C Programming Language"](https://kremlin.cc/k&r.pdf) (K&R C), pages 185-188 (PDF page 199). This module exports the following values: +More documentation about Grain's [memory allocator](https://github.com/grain-lang/grain/blob/main/stdlib/runtime/malloc.gr) can be found in that module. It exports the following values: ```grain /** @@ -31,11 +31,9 @@ export let malloc: (nbytes: WasmI32) -> WasmI32 export let free = (ap: WasmI32) => Void /** - * Returns the current free list pointer (used for debugging) - * - * @returns The free list pointer + * Leaks all memory in all free lists; used for testing. */ -export let getFreePtr = () => WasmI32 +export let leakAll = () => Void ``` These functions should be familiar to programmers who have used `malloc` and `free` in C (and C-like languages). For further reading, refer to this Wikipedia page: [C dynamic memory allocation](https://en.wikipedia.org/wiki/C_dynamic_memory_allocation). The semantics of these functions align near-identically with those of C's corresponding functions. diff --git a/stdlib/runtime/malloc.gr b/stdlib/runtime/malloc.gr index c80ea1674b..1a83d90a68 100644 --- a/stdlib/runtime/malloc.gr +++ b/stdlib/runtime/malloc.gr @@ -3,8 +3,6 @@ module Malloc /* * This module implements a generic memory allocator. - * The algorithm is quite simple, being based on the memory allocator - * from pages 185-188 of K&R C (2nd edition). */ from "runtime/unsafe/wasmi32" include WasmI32 @@ -20,6 +18,8 @@ use WasmI32.{ (>>>), (==), (!=), + (&), + (^), } from "runtime/exception" include Exception @@ -32,44 +32,64 @@ primitive (||) = "@or" primitive heapStart = "@heap.start" -/* UNDERSTANDING THE STRUCTURE OF THE FREE LIST - * The original K&R definition for the free list entry type was the following: +/* UNDERSTANDING THE STRUCTURE OF THE FREE LISTS * - * union header { - * struct { - * union header *ptr; - * unsigned size; - * } s; - * long x; // <- forces 8-byte alignment - * }; + * `malloc` allocates memory and `free` releases this memory. Two separate free + * lists are maintained, one for small blocks of 64 bytes, and one for larger + * blocks of multiples of 64 bytes. Each block has an 8-byte header and 8-byte + * footer to keep track of block sizes and maintain the free list. + * + * Most allocations in programs are small, so the separate free lists allow us + * to implement `malloc` and `free` in O(1) for small allocations and O(n) + * `malloc` and O(1) `free` for large allocations, where `n` is the size of the + * free list for large blocks. + * + * The small blocks are able to service: + * - Numbers (with the exception of large BigInts/Rationals) + * - Tuples/Arrays up to 8 elements + * - Records up to 6 elements + * - Variants up to 5 elements + * - Closures up to 6 elements + * - Bytes/Strings up to length 32 + * + * Blocks in memory look like this: * - * In memory, this is really just two ints (assuming we're working in 32-bit mode). - * As such, we manually lay out the entries on the heap as follows (note that we - * use helpers to facilitate accessing and setting these values): + * 8 bytes 8 bytes 64n - 16 bytes 8 bytes 8 bytes + * ┌─────────────────────┬────────────────┬─────────────────┬────────────────┬─────────────────────┐ + * │ │ + * └─────────────────────┴────────────────┴─────────────────┴────────────────┴─────────────────────┘ * - * Grain C Equivalent - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * let ptr === union header *ptr - * getNext(ptr) === ptr->s.ptr - * getSize(ptr) === ptr->s.size + * Block headers look like this: + * ┌───────────────────────┬──────────────┐ + * │ │ + * └───────────────────────┴──────────────┘ + * + * Block footers look like this: + * ┌───────────────────────┬──────────────┐ + * │ │ + * └───────────────────────┴──────────────┘ + * + * The size is kept in the header and footer to allow us to quickly combine + * free blocks when blocks are freed. + * + * Pointers to the previous/next free blocks give us doubly-linked free lists, + * which makes it possible to remove blocks from the free list in constant + * time. + * + * A block is considered in use when the previous/next pointers are both zero. */ /** - * Pointer to the start of the free list. This is always a multiple of + * Pointers to the start of the free lists. This is always a multiple of * 8, with the exception of its initial value (used as a sentinel). */ -let mut freePtr = 1n +let mut smallBlockFreePtr = 1n +let mut largeBlockFreePtr = 1n /** * Size (in bytes) of entries in the free list. */ -let mallocHeaderSize = 8n - -/** - * log_2(mallocHeaderSize) (multiplication by the header - * size is equivalent to left-shifting by this amount) - */ -let logMallocHeaderSize = 3n +let _HEADER_FOOTER_SIZE = 8n /** * The current size (in bytes) of the heap. @@ -87,9 +107,9 @@ let _BASE = heapStart() + _RESERVED_RUNTIME_SPACE /** * The start pointer of the heap. */ -let _HEAP_START = _BASE + mallocHeaderSize +let _HEAP_START = _BASE + _HEADER_FOOTER_SIZE -let _NEXT_OFFSET = 0n +let _PREV_NEXT_OFFSET = 0n let _SIZE_OFFSET = 4n /** @@ -97,33 +117,65 @@ let _SIZE_OFFSET = 4n */ let _PAGE_SIZE = 65536n -let getNext = (ptr: WasmI32) => { - WasmI32.load(ptr, _NEXT_OFFSET) +/** + * Size (in bytes) of blocks allocated by the allocator + */ +let _UNIT_SIZE = 64n + +/** + * log_2(_UNIT_SIZE) (multiplication by the header + * size is equivalent to left-shifting by this amount) + */ +let logUnitSize = 6n + +let headerGetPrevious = (headerPtr: WasmI32) => { + WasmI32.load(headerPtr, _PREV_NEXT_OFFSET) +} + +let headerSetPrevious = (headerPtr: WasmI32, val: WasmI32) => { + WasmI32.store(headerPtr, val, _PREV_NEXT_OFFSET) +} + +let headerGetSize = (headerPtr: WasmI32) => { + WasmI32.load(headerPtr, _SIZE_OFFSET) +} + +let headerSetSize = (headerPtr: WasmI32, val: WasmI32) => { + WasmI32.store(headerPtr, val, _SIZE_OFFSET) } -let setNext = (ptr: WasmI32, val: WasmI32) => { - WasmI32.store(ptr, val, _NEXT_OFFSET) +// These functions are no different than the ones above, but exist to make the +// code much easier to follow + +let footerGetNext = (footerPtr: WasmI32) => { + WasmI32.load(footerPtr, _PREV_NEXT_OFFSET) +} + +let footerSetNext = (footerPtr: WasmI32, val: WasmI32) => { + WasmI32.store(footerPtr, val, _PREV_NEXT_OFFSET) } -let getSize = (ptr: WasmI32) => { - WasmI32.load(ptr, _SIZE_OFFSET) +let footerGetSize = (footerPtr: WasmI32) => { + WasmI32.load(footerPtr, _SIZE_OFFSET) } -let setSize = (ptr: WasmI32, val: WasmI32) => { - WasmI32.store(ptr, val, _SIZE_OFFSET) +let footerSetSize = (footerPtr: WasmI32, val: WasmI32) => { + WasmI32.store(footerPtr, val, _SIZE_OFFSET) } /** - * Requests that the heap be grown by the given number of bytes. + * Requests that the heap be grown by the given number of units. * - * @param nbytes: The number of bytes requested + * @param nunits: The number of units requested * @returns The pointer to the beginning of the extended region if successful or -1 otherwise */ -let growHeap = (nbytes: WasmI32) => { +let growHeap = (nunits: WasmI32) => { let mut reqSize = 0n let mut reqResult = 0n let mut origSize = heapSize + let nbytes = nunits << logUnitSize + // If the size has not been initialized, do so. if (heapSize == 0n) { heapSize = memorySize() * _PAGE_SIZE - _HEAP_START @@ -131,8 +183,7 @@ let growHeap = (nbytes: WasmI32) => { // More bytes requested than the initial heap size, // so we need to request more anyway. reqSize = nbytes - heapSize - reqSize = reqSize >>> 16n - reqSize += 1n + reqSize = (reqSize + _PAGE_SIZE - 1n) >>> 16n reqResult = memoryGrow(reqSize) if (reqResult == -1n) { -1n @@ -161,49 +212,105 @@ let growHeap = (nbytes: WasmI32) => { } } +let removeFromFreeList = (blockPtr: WasmI32) => { + let blockSize = headerGetSize(blockPtr) + let blockFooterPtr = blockPtr + blockSize * _UNIT_SIZE - _HEADER_FOOTER_SIZE + let nextPtr = footerGetNext(blockFooterPtr) + + let prevPtr = headerGetPrevious(blockPtr) + if (prevPtr == 1n) { + // this block was the start of the free list + if (blockSize == 1n) { + smallBlockFreePtr = nextPtr + } else { + largeBlockFreePtr = nextPtr + } + + headerSetPrevious(nextPtr, prevPtr) + } else { + let prevSize = headerGetSize(prevPtr) + let prevFooterPtr = prevPtr + prevSize * _UNIT_SIZE - _HEADER_FOOTER_SIZE + footerSetNext(prevFooterPtr, nextPtr) + headerSetPrevious(nextPtr, prevPtr) + } +} + /** * Frees the given allocated pointer. * * @param ap: The pointer to free */ provide let free = (ap: WasmI32) => { - let mut blockPtr = ap - 8n // 8 bytes for malloc header - let mut p = freePtr - - // Edge case: for the first free (called by morecore), the free pointer - // is actually already pointing to this node, so we don't do anything. - if (blockPtr != freePtr) { - // Find the location to insert this block into the free list - while (true) { - let nextp = getNext(p) - if ( - blockPtr > p && blockPtr < nextp || - p >= nextp && (blockPtr > p || blockPtr < nextp) - ) { - break - } - p = nextp + let mut blockPtr = ap - _HEADER_FOOTER_SIZE + let mut blockSize = headerGetSize(blockPtr) + + let nextBlockPtr = blockPtr + blockSize * _UNIT_SIZE + if (headerGetPrevious(nextBlockPtr) > 0n) { + // adjacent block is free, so merge + removeFromFreeList(nextBlockPtr) + + let nextBlockSize = headerGetSize(nextBlockPtr) + blockSize += nextBlockSize + headerSetSize(blockPtr, blockSize) + + let footerPtr = blockPtr + blockSize * _UNIT_SIZE - _HEADER_FOOTER_SIZE + footerSetSize(footerPtr, blockSize) + } + + let prevBlockFooterPtr = blockPtr - _HEADER_FOOTER_SIZE + if (footerGetNext(prevBlockFooterPtr) > 0n) { + // (prev) adjacent block is free, so merge + let prevBlockSize = footerGetSize(prevBlockFooterPtr) + let prevBlockPtr = blockPtr - prevBlockSize * _UNIT_SIZE + + if (prevBlockSize == 1n) { + // Since we merged, this block is already a part of the free list. If + // the old block was size 1, it needs to be switched to the large list. + removeFromFreeList(prevBlockPtr) } - // Merge the block into the adjacent free list entry above, if needed - let blockPtrSize = getSize(blockPtr) - let next = getNext(p) - if (blockPtr + blockPtrSize == next) { - setSize(blockPtr, blockPtrSize + getSize(next)) - setNext(blockPtr, getNext(next)) - } else { - setNext(blockPtr, next) + blockPtr = prevBlockPtr + + blockSize += prevBlockSize + headerSetSize(blockPtr, blockSize) + + let footerPtr = blockPtr + blockSize * _UNIT_SIZE - _HEADER_FOOTER_SIZE + footerSetSize(footerPtr, blockSize) + footerSetNext(footerPtr, footerGetNext(prevBlockFooterPtr)) + + if (prevBlockSize == 1n) { + if (largeBlockFreePtr != 1n) { + headerSetPrevious(largeBlockFreePtr, blockPtr) + } + + let footerPtr = blockPtr + blockSize * _UNIT_SIZE - _HEADER_FOOTER_SIZE + footerSetNext(footerPtr, largeBlockFreePtr) + headerSetPrevious(blockPtr, 1n) + + largeBlockFreePtr = blockPtr } - // Merge the previous (adjacent) free list entry into this block, if needed - let pSize = getSize(p) - if (p + pSize == blockPtr) { - setSize(p, pSize + getSize(blockPtr)) - setNext(p, getNext(blockPtr)) + } else { + if (blockSize == 1n) { + if (smallBlockFreePtr != 1n) { + headerSetPrevious(smallBlockFreePtr, blockPtr) + } + + let footerPtr = blockPtr + _UNIT_SIZE - _HEADER_FOOTER_SIZE + footerSetNext(footerPtr, smallBlockFreePtr) + headerSetPrevious(blockPtr, 1n) + + smallBlockFreePtr = blockPtr } else { - setNext(p, blockPtr) + if (largeBlockFreePtr != 1n) { + headerSetPrevious(largeBlockFreePtr, blockPtr) + } + + let footerPtr = blockPtr + blockSize * _UNIT_SIZE - _HEADER_FOOTER_SIZE + footerSetNext(footerPtr, largeBlockFreePtr) + headerSetPrevious(blockPtr, 1n) + + largeBlockFreePtr = blockPtr } - // Set the free list head to this block - freePtr = p } } @@ -215,25 +322,48 @@ provide let free = (ap: WasmI32) => { * @param nbytes: The number of bytes to try to grow the heap by * @returns A pointer to the start of the free list if successful or -1 otherwise */ -let morecore = (nbytes: WasmI32) => { +let morecore = (nunits: WasmI32) => { let origSize = heapSize - let mut cp = growHeap(nbytes) + + let cp = growHeap(nunits + 1n) // include an extra unit for 4 headers/footers // If there was an error, fail if (cp == -1n) { Exception.panic("OutOfMemory: Maximum memory size exceeded") } else { - // Set the size of the new block to the amount the - // heap was grown. + // Set up the block. We'll add dummy headers/footers before and after the + // block to avoid unnecessary bounds checks elsewhere in the code. let grownAmount = heapSize - origSize - setSize(cp, grownAmount) + let units = (grownAmount >>> logUnitSize) - 1n + + let dummyFooter = cp + footerSetSize(dummyFooter, 0n) + footerSetNext(dummyFooter, 0n) + + let blockHeader = dummyFooter + _HEADER_FOOTER_SIZE + headerSetSize(blockHeader, units) + headerSetPrevious(blockHeader, 0n) + + let blockFooter = blockHeader + units * _UNIT_SIZE - _HEADER_FOOTER_SIZE + footerSetSize(blockFooter, units) + footerSetNext(blockFooter, 0n) + + let dummyHeader = blockFooter + _HEADER_FOOTER_SIZE + headerSetSize(dummyHeader, 0n) + headerSetPrevious(dummyHeader, 0n) + // Call free() with the new block to add it to the free list. - free(cp + 8n) + free(blockHeader + _HEADER_FOOTER_SIZE) + // Return the free list pointer. - freePtr + largeBlockFreePtr } } +let roundBytesToUnits = bytes => { + (bytes + _UNIT_SIZE - 1n) >>> logUnitSize +} + /** * Allocates the requested number of bytes, returning a pointer. * @@ -241,70 +371,74 @@ let morecore = (nbytes: WasmI32) => { * @returns The pointer to the allocated region (8-byte aligned) or -1 if the allocation failed */ provide let malloc = (nbytes: WasmI32) => { - let mut nbytes = nbytes - let mut prevp = freePtr + let mut nunits = roundBytesToUnits(nbytes + _HEADER_FOOTER_SIZE * 2n) - // Set nbytes to the next multiple of mallocHeaderSize greater - // than the given size - let mut nunits = (nbytes + mallocHeaderSize - 1n) / mallocHeaderSize + 1n - nbytes = nunits << logMallocHeaderSize // multiply by header size + // Fast path for small blocks + if (nunits == 1n && smallBlockFreePtr != 1n) { + let blockPtr = smallBlockFreePtr + headerSetPrevious(blockPtr, 0n) + let footer = blockPtr + _UNIT_SIZE - _HEADER_FOOTER_SIZE + let next = footerGetNext(footer) + footerSetNext(footer, 0n) - // Handle initialization - if (heapSize == 0n) { - WasmI32.store(_BASE, _BASE, _NEXT_OFFSET) - freePtr = _BASE - prevp = _BASE - WasmI32.store(_BASE, 0n, _SIZE_OFFSET) + headerSetPrevious(next, 1n) + smallBlockFreePtr = next + + return blockPtr + _HEADER_FOOTER_SIZE } - let mut ret = -1n - - // Search the freelist for any blocks large enough. - for (let mut p = getNext(prevp);; { - prevp = p - p = getNext(p) - }) { - let size = getSize(p) - if (size >= nbytes) { - // If this block is big enough, allocate from it. - if (size == nbytes) { - // It's exactly the right size! - setNext(prevp, getNext(p)) - } else { - // Shrink it as needed - let newSize = size - nbytes - setSize(p, newSize) - p += newSize - setSize(p, nbytes) - } - // Update the pointer to the free list. - freePtr = prevp + // Find a large enough block + let mut freeBlockPtr = largeBlockFreePtr + while (true) { + // Free list is empty; grow the heap + if (freeBlockPtr == 1n) { + freeBlockPtr = morecore(nunits) + } + + let blockSize = headerGetSize(freeBlockPtr) + let footerPtr = freeBlockPtr + blockSize * _UNIT_SIZE - _HEADER_FOOTER_SIZE - // Return region past the header - ret = p + 8n - break + // Perfectly sized block, or one unit larger to avoid leaving size 1 blocks + // in the large block free list + if (blockSize == nunits || blockSize == nunits + 1n) { + let blockPtr = freeBlockPtr + + removeFromFreeList(blockPtr) + headerSetPrevious(blockPtr, 0n) + footerSetNext(footerPtr, 0n) + + return blockPtr + _HEADER_FOOTER_SIZE } - // We've reached the end of the free list. Time to grow the heap. - if (p == freePtr) { - // Attempt to grow the heap - p = morecore(nbytes) - // If growing the heap failed, return -1. - if (p == -1n) { - ret = -1n - break - } + // Take a chunk of this larger block + if (blockSize > nunits) { + let blockPtr = freeBlockPtr + + let newSize = blockSize - nunits + headerSetSize(blockPtr, newSize) + let newFooterPtr = blockPtr + newSize * _UNIT_SIZE - _HEADER_FOOTER_SIZE + footerSetSize(newFooterPtr, newSize) + footerSetNext(newFooterPtr, footerGetNext(footerPtr)) + + let newBlockPtr = newFooterPtr + _HEADER_FOOTER_SIZE + headerSetSize(newBlockPtr, nunits) + headerSetPrevious(newBlockPtr, 0n) + footerSetSize(footerPtr, nunits) + footerSetNext(footerPtr, 0n) + + return newBlockPtr + _HEADER_FOOTER_SIZE } + + freeBlockPtr = footerGetNext(footerPtr) } - ret + + return -1n } /** - * Returns the current free list pointer. - * Used for debugging. - * - * @returns The free list pointer + * Leaks all memory in all free lists; used for testing. */ -provide let getFreePtr = () => { - freePtr +provide let leakAll = () => { + smallBlockFreePtr = 1n + largeBlockFreePtr = 1n } diff --git a/stdlib/runtime/malloc.md b/stdlib/runtime/malloc.md index bfda44fb30..5fa38e882d 100644 --- a/stdlib/runtime/malloc.md +++ b/stdlib/runtime/malloc.md @@ -46,18 +46,11 @@ Returns: |----|-----------| |`WasmI32`|The pointer to the allocated region (8-byte aligned) or -1 if the allocation failed| -### Malloc.**getFreePtr** +### Malloc.**leakAll** ```grain -getFreePtr : () => WasmI32 +leakAll : () => Void ``` -Returns the current free list pointer. -Used for debugging. - -Returns: - -|type|description| -|----|-----------| -|`WasmI32`|The free list pointer| +Leaks all memory in all free lists; used for testing. diff --git a/stdlib/wasi/process.gr b/stdlib/wasi/process.gr index b977af5180..f46fdcc9da 100644 --- a/stdlib/wasi/process.gr +++ b/stdlib/wasi/process.gr @@ -145,16 +145,17 @@ provide let argv = () => { } let argc = WasmI32.load(argcPtr, 0n) - Memory.free(argcPtr) let argsLength = argc * 4n let arr = allocateArray(argc) if (WasmI32.eqz(argsLength)) { + Memory.free(argcPtr) return Ok(WasmI32.toGrain(arr): Array) } let argvBufSize = WasmI32.load(argvBufSizePtr, 0n) + Memory.free(argcPtr) let argvPtr = Memory.malloc(argc * 4n) let argvBufPtr = Memory.malloc(argvBufSize)