From 39aec8a5c77a208d63eba504cc2be81a54a6275a Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Wed, 27 Nov 2024 13:53:24 -0800 Subject: [PATCH 01/34] Add a hash map for fixed-sized string keys. --- catkit_core/SharedHashMap.h | 180 ++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 catkit_core/SharedHashMap.h diff --git a/catkit_core/SharedHashMap.h b/catkit_core/SharedHashMap.h new file mode 100644 index 00000000..3177792d --- /dev/null +++ b/catkit_core/SharedHashMap.h @@ -0,0 +1,180 @@ +#ifndef SHARED_HASH_MAP_H +#define SHARED_HASH_MAP_H + +#include +#include +#include +#include + +// MurmurHash3 32-bit version +uint32_t murmurhash3(const std::string &key, uint32_t seed = 0) +{ + const uint8_t *data = reinterpret_cast(key.data()); + size_t len = key.size(); + + uint32_t h = seed; + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + // Partition in blocks of 4 bytes. + const int nblocks = len / 4; + const uint32_t* blocks = reinterpret_cast(data); + for (int i = 0; i < nblocks; i++) + { + uint32_t k = blocks[i]; + k *= c1; + k = (k << 15) | (k >> 17); + k *= c2; + + h ^= k; + h = (h << 13) | (h >> 19); + h = h * 5 + 0xe6546b64; + } + + // Process leftover bytes. + const uint8_t* tail = data + nblocks * 4; + uint32_t k1 = 0; + + switch (len & 3) + { + case 3: + k1 ^= tail[2] << 16; + case 2: + k1 ^= tail[1] << 8; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = (k1 << 15) | (k1 >> 17); + k1 *= c2; + h ^= k1; + case 0: + // Do nothing. + } + + h ^= len; + h ^= (h >> 16); + h *= 0x85ebca6b; + h ^= (h >> 13); + h *= 0xc2b2ae35; + h ^= (h >> 16); + + return h; +} + +// A hash map with the following limitations: +// * entries cannot be removed. +// * key is string type of fixed size. +template +class SharedHashMap +{ +private: + enum EntryFlags + { + UNOCCUPIED = 0, + INITIALIZING = 1, + OCCUPIED = 2 + }; + + struct Entry + { + std::atomic flags = EntryFlags::UNOCCUPIED; + char key[MaxKeyLength]; + + Value value; + }; + + Entry m_Data[Size]; + + size_t hash(const std::string &key) const + { + return murmurhash3(key) % Size; + } + +public: + bool insert(const std::string &key, const Value &value) + { + if (key.size() >= MaxKeyLength) + { + // Key is too long to fit in the fixed-size buffer. + return false; + } + + size_t index = hash(key); + + for (size_t i = 0; i < Size; ++i) + { + size_t probe = (index + i) % Size; + + // Try to use this entry. + auto flags = m_Data[probe].flags.compare_exchange_strong(EntryFlags::UNOCCUPIED, EntryFlags::INITIALIZING); + + // If this entry is still initializing, do a spin-wait until it's occupied. + // This should almost never be necessary and should only last a short while if it does. + while (flags == EntryFlags::INITIALIZING) + { + flags = m_Data[probe].flags.load(); + } + + if (flags == EntryFlags::OCCUPIED) + { + // Check if the key is our key. + if (std::strcmp(m_Data[probe].key, key.c_str()) == 0) + { + // Key already exists. + return false; + } + } + + if (flags == EntryFlags::UNOCCUPIED) + { + // Copy key ensuring null-termination. + std::strncpy(m_Data[probe].key, key.c_str(), MaxKeyLength - 1); + m_Data[probe].key[MaxKeyLength - 1] = '\0'; + + // Copy m_Data. + m_Data[probe].value = value; + + // Make occupied. + m_Data[probe].flags = EntryFlags::OCCUPIED; + + return true; + } + } + + // Map is full. + return false; + } + + const Value* find(const std::string &key) const + { + if (key.size() >= MaxKeyLength) + { + // Key is too long to fit in the fixed-size buffer. + return nullptr; + } + + size_t index = hash(key); + + for (size_t i = 0; i < Size; ++i) + { + size_t probe = (index + i) % Size; + + EntryFlags flags = m_Data[probe].flags.load(); + + if (flags == EntryFlags::OCCUPIED && std::strcmp(m_Data[probe].key, key.c_str()) == 0) + { + return &m_Data[probe].value; + } + + if (flags != EntryFlags::OCCUPIED) + { + break; + } + } + + // Key not found. + return nullptr; + } +}; + +#endif // SHARED_HASH_MAP_H From 09dd48a10521c738c960edde5a780b27292af5e0 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Wed, 27 Nov 2024 13:54:02 -0800 Subject: [PATCH 02/34] Add a pool allocator that can work from shared memory. --- catkit_core/PoolAllocator.h | 70 +++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 catkit_core/PoolAllocator.h diff --git a/catkit_core/PoolAllocator.h b/catkit_core/PoolAllocator.h new file mode 100644 index 00000000..7e09045c --- /dev/null +++ b/catkit_core/PoolAllocator.h @@ -0,0 +1,70 @@ +#ifndef POOL_ALLOCATOR_H +#define POOL_ALLOCATOR_H + +#include +#include + +// A simple lock-free pool allocator. +template +class PoolAllocator +{ +private: + Value m_Pool[Size]; + + std::atomic_int32_t m_Head; + std::atomic_int32_t m_Next[Size]; +public: + PoolAllocator() + : m_Head(0); + { + // Check that size is smaller than maximum value of int32_t - 1. + static_assert(Size < INT32_MAX - 1, "Size must be smaller than INT32_MAX - 1."); + + // Initialize the linked list. + for (size_t i = 0; i < Size; ++i) + { + m_Next[i] = i + 1; + } + } + + Value *Allocate() + { + std::int32_t head; + std::int32_t next; + + // Pop the first element from the linked list. + do + { + head = m_Head.load(std::memory_order_relaxed); + next = m_Next[head].load(std::memory_order_relaxed); + } while (!m_Head.compare_exchange_weak(head, next)); + + // Return the popped element. + return &m_Pool[head]; + } + + void Deallocate(Value *element) + { + // Ignore null pointers. + if (element == nullptr) + { + return; + } + + // Check that the element is within the pool. + std::int32_t index = element - m_Pool; + + if (index < 0 || index >= Size) + { + return; + } + + // Push the element back on the front of the linked list. + do + { + m_Next[index] = m_Head.load(std::memory_order_relaxed); + } while (!m_Head.compare_exchange_weak(m_Next[index], index)); + } +}; + +#endif // POOL_ALLOCATOR_H From a744542d34c2f15bd8fbfd60167a8ef1f09b6f7c Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Wed, 27 Nov 2024 14:22:24 -0800 Subject: [PATCH 03/34] Make pool allocator only manage an outside buffer. --- catkit_core/PoolAllocator.h | 49 +++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/catkit_core/PoolAllocator.h b/catkit_core/PoolAllocator.h index 7e09045c..15eaee73 100644 --- a/catkit_core/PoolAllocator.h +++ b/catkit_core/PoolAllocator.h @@ -5,56 +5,57 @@ #include // A simple lock-free pool allocator. -template +template class PoolAllocator { private: - Value m_Pool[Size]; - - std::atomic_int32_t m_Head; - std::atomic_int32_t m_Next[Size]; + std::atomic_size_t m_Head; + std::atomic_size_t m_Next[Size]; public: PoolAllocator() : m_Head(0); { - // Check that size is smaller than maximum value of int32_t - 1. - static_assert(Size < INT32_MAX - 1, "Size must be smaller than INT32_MAX - 1."); - // Initialize the linked list. for (size_t i = 0; i < Size; ++i) { - m_Next[i] = i + 1; + if (i == Size - 1) + { + m_Next[i] = -1; + } + else + { + m_Next[i] = i + 1; + } } } - Value *Allocate() + size_t Allocate() { - std::int32_t head; - std::int32_t next; + std::size_t head; + std::size_t next; // Pop the first element from the linked list. do { head = m_Head.load(std::memory_order_relaxed); + + // Check if the pool is empty. + if (head == -1) + { + return nullptr; + } + next = m_Next[head].load(std::memory_order_relaxed); } while (!m_Head.compare_exchange_weak(head, next)); // Return the popped element. - return &m_Pool[head]; + return head; } - void Deallocate(Value *element) + void Deallocate(size_t index) { - // Ignore null pointers. - if (element == nullptr) - { - return; - } - - // Check that the element is within the pool. - std::int32_t index = element - m_Pool; - - if (index < 0 || index >= Size) + // Check if the element is within the pool bounds. + if (index >= Size) { return; } From 92ad8da2ba8572ad9f05bf3d8df895d38d668bfb Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Fri, 29 Nov 2024 10:39:01 -0800 Subject: [PATCH 04/34] Put implementation into inl file. --- catkit_core/PoolAllocator.h | 58 +++------------------------------- catkit_core/PoolAllocator.inl | 59 +++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 53 deletions(-) create mode 100644 catkit_core/PoolAllocator.inl diff --git a/catkit_core/PoolAllocator.h b/catkit_core/PoolAllocator.h index 15eaee73..92524f0f 100644 --- a/catkit_core/PoolAllocator.h +++ b/catkit_core/PoolAllocator.h @@ -12,60 +12,12 @@ class PoolAllocator std::atomic_size_t m_Head; std::atomic_size_t m_Next[Size]; public: - PoolAllocator() - : m_Head(0); - { - // Initialize the linked list. - for (size_t i = 0; i < Size; ++i) - { - if (i == Size - 1) - { - m_Next[i] = -1; - } - else - { - m_Next[i] = i + 1; - } - } - } + PoolAllocator(); - size_t Allocate() - { - std::size_t head; - std::size_t next; - - // Pop the first element from the linked list. - do - { - head = m_Head.load(std::memory_order_relaxed); - - // Check if the pool is empty. - if (head == -1) - { - return nullptr; - } - - next = m_Next[head].load(std::memory_order_relaxed); - } while (!m_Head.compare_exchange_weak(head, next)); - - // Return the popped element. - return head; - } - - void Deallocate(size_t index) - { - // Check if the element is within the pool bounds. - if (index >= Size) - { - return; - } - - // Push the element back on the front of the linked list. - do - { - m_Next[index] = m_Head.load(std::memory_order_relaxed); - } while (!m_Head.compare_exchange_weak(m_Next[index], index)); - } + size_t Allocate(); + void Deallocate(size_t index); }; +#include "PoolAllocator.inl" + #endif // POOL_ALLOCATOR_H diff --git a/catkit_core/PoolAllocator.inl b/catkit_core/PoolAllocator.inl new file mode 100644 index 00000000..a4548f39 --- /dev/null +++ b/catkit_core/PoolAllocator.inl @@ -0,0 +1,59 @@ +#include "PoolAllocator.h" + +template +PoolAllocator::PoolAllocator() + : m_Head(0) +{ + // Initialize the linked list. + for (size_t i = 0; i < Size; ++i) + { + if (i == Size - 1) + { + m_Next[i] = -1; + } + else + { + m_Next[i] = i + 1; + } + } +} + +template +std::size_t PoolAllocator::Allocate() +{ + std::size_t head; + std::size_t next; + + // Pop the first element from the linked list. + do + { + head = m_Head.load(std::memory_order_relaxed); + + // Check if the pool is empty. + if (head == -1) + { + return nullptr; + } + + next = m_Next[head].load(std::memory_order_relaxed); + } while (!m_Head.compare_exchange_weak(head, next)); + + // Return the popped element. + return head; +} + +template +PoolAllocator::Deallocate(std::size_t index) +{ + // Check if the element is within the pool bounds. + if (index >= Size) + { + return; + } + + // Push the element back on the front of the linked list. + do + { + m_Next[index] = m_Head.load(std::memory_order_relaxed); + } while (!m_Head.compare_exchange_weak(m_Next[index], index)); +} \ No newline at end of file From 7491502dcd24ce9541e1bbd500181bd73ba41860 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Sun, 1 Dec 2024 18:19:17 -0800 Subject: [PATCH 05/34] Fix typos. --- catkit_core/PoolAllocator.inl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/catkit_core/PoolAllocator.inl b/catkit_core/PoolAllocator.inl index a4548f39..6e02b1f9 100644 --- a/catkit_core/PoolAllocator.inl +++ b/catkit_core/PoolAllocator.inl @@ -32,7 +32,7 @@ std::size_t PoolAllocator::Allocate() // Check if the pool is empty. if (head == -1) { - return nullptr; + return -1; } next = m_Next[head].load(std::memory_order_relaxed); @@ -43,7 +43,7 @@ std::size_t PoolAllocator::Allocate() } template -PoolAllocator::Deallocate(std::size_t index) +void PoolAllocator::Deallocate(std::size_t index) { // Check if the element is within the pool bounds. if (index >= Size) From ca90158df0abac2c29af770863577a7694d71625 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Thu, 5 Dec 2024 17:00:16 -0800 Subject: [PATCH 06/34] Add a lock-free free list allocator. --- catkit_core/FreeListAllocator.h | 113 ++++++++ catkit_core/FreeListAllocator.inl | 449 ++++++++++++++++++++++++++++++ 2 files changed, 562 insertions(+) create mode 100644 catkit_core/FreeListAllocator.h create mode 100644 catkit_core/FreeListAllocator.inl diff --git a/catkit_core/FreeListAllocator.h b/catkit_core/FreeListAllocator.h new file mode 100644 index 00000000..695af672 --- /dev/null +++ b/catkit_core/FreeListAllocator.h @@ -0,0 +1,113 @@ +#ifndef FREE_LIST_ALLOCATOR_H +#define FREE_LIST_ALLOCATOR_H + +#include "PoolAllocator.h" + +#include +#include + +// A simple lock-free free list allocator. +template +class FreeListAllocator +{ +public: + using BlockHandle = std::int32_t; + using Offset = std::uint64_t; + using Size = std::uint64_t; + + static const BlockHandle INVALID_HANDLE = -1; + + // A unique descriptor of the block. + class BlockDescriptor + { + public: + BlockDescriptor() + { + } + + BlockDescriptor(Offset offset, Size size, bool is_free) + { + Set(offset, size, is_free); + } + + void Set(const Offset &offset, const Size &size, const bool &is_free) + { + m_OffsetAndFreeFlag = (offset & ~_FREE_FLAG) | (_FREE_FLAG * is_free); + m_Size = size; + } + + Offset GetOffset() const + { + return m_OffsetAndFreeFlag & ~_FREE_FLAG; + } + + void SetOffset(const Offset &new_offset) + { + m_OffsetAndFreeFlag = new_offset | (m_OffsetAndFreeFlag & _FREE_FLAG); + } + + Size GetSize() const + { + return m_Size; + } + + void SetSize(const Size &new_size) + { + m_Size = new_size; + } + + bool IsFree() const + { + return m_OffsetAndFreeFlag & _FREE_FLAG; + } + + void SetFree(const bool &is_free) + { + m_OffsetAndFreeFlag = (m_OffsetAndFreeFlag & ~_FREE_FLAG) | (_FREE_FLAG * is_free); + } + + private: + Offset m_OffsetAndFreeFlag; + Size m_Size; + + static const Offset _FREE_FLAG = 0x8000000000000000; + }; + + // Check that the BlockDescriptor is lock-free atomic. + static_assert(std::atomic::is_always_lock_free); + + struct Block + { + std::atomic descriptor; + std::atomic next; + }; + + FreeListAllocator(std::size_t buffer_size); + ~FreeListAllocator(); + + BlockHandle Allocate(std::size_t size); + void Deallocate(BlockHandle index); + + std::size_t GetOffset(BlockHandle index); + + void PrintState(); + +private: + PoolAllocator m_BlockAllocator; + Block m_Blocks[MaxNumBlocks]; + + std::atomic m_Head; + + BlockHandle FindFirstFreeBlock(std::size_t size); + + void InsertBlockSorted(BlockHandle index); + bool RemoveBlock(BlockHandle index); + + bool MarkBlockAsFree(BlockHandle index, bool mark_free); + + bool TryCoalesceBlocks(BlockHandle a, BlockHandle b, bool owner_of_a); +}; + +#include "FreeListAllocator.inl" + +#endif // FREE_LIST_ALLOCATOR_H diff --git a/catkit_core/FreeListAllocator.inl b/catkit_core/FreeListAllocator.inl new file mode 100644 index 00000000..8524c395 --- /dev/null +++ b/catkit_core/FreeListAllocator.inl @@ -0,0 +1,449 @@ +#include "FreeListAllocator.h" + +#include + +const std::size_t MAX_ATTEMPTS = 5; + +template +FreeListAllocator::FreeListAllocator(std::size_t buffer_size) +{ + // Initialize the free list. + m_Head = m_BlockAllocator.Allocate(); + + m_Blocks[m_Head].descriptor = BlockDescriptor(0, buffer_size, true); + m_Blocks[m_Head].next = -1; +} + +template +FreeListAllocator::~FreeListAllocator() +{ +} + +template +FreeListAllocator::BlockHandle FreeListAllocator::Allocate(std::size_t size) +{ + // Round up the size to the nearest multiple of the alignment. + size = (size + Alignment - 1) & ~(Alignment - 1); + + // std::cout << "Allocating " << size << std::endl; + + for (size_t i = 0; i < MAX_ATTEMPTS; ++i) + { + BlockHandle index = FindFirstFreeBlock(size); + Block &free_block = m_Blocks[index]; + + if (index == -1) + { + // std::cout << "No free block found." << std::endl; + return -1; + } + + BlockDescriptor old_descriptor; + BlockDescriptor new_descriptor; + + // Reduce the size of the free block. + do + { + old_descriptor = free_block.descriptor.load(); + + // If the block is too small or not free, we need to try again. + if (old_descriptor.GetSize() < size || !old_descriptor.IsFree()) + { + // Break out of the nested loop and try again. + // std::cout << "Block is too small or not free. Size of block is " << old_descriptor.GetSize() << std::endl; + break; + } + + if (old_descriptor.GetSize() == size) + { + // The block is exactly the right size. + // std::cout << "Block is exactly the right size." << std::endl; + + // Mark the block as allocated. + if (MarkBlockAsFree(index, false)) + { + // Remove the block from the free list. + // This is guaranteed + RemoveBlock(index); + + // Return the block. + return index; + } + else + { + // Try again. + continue; + } + } + + // Reduce the size of the block by the requested size. + new_descriptor = old_descriptor; + new_descriptor.SetSize(old_descriptor.GetSize() - size); + new_descriptor.SetOffset(old_descriptor.GetOffset() + size); + } while (!free_block.descriptor.compare_exchange_weak(old_descriptor, new_descriptor)); + + if (old_descriptor.GetSize() < size || !old_descriptor.IsFree()) + { + // Try again. + continue; + } + + // std::cout << "Reduced the size of the free block: " << old_descriptor.GetSize() << ", " << new_descriptor.GetSize() << std::endl; + // std::cout << "Old descriptor offset: " << old_descriptor.GetOffset() << std::endl; + // std::cout << "Old descriptor size: " << old_descriptor.GetSize() << std::endl; + // std::cout << "New size: " << size << std::endl; + + // We now have a block that is large enough to allocate the requested size. + // Add a new block for the remaining free space. + BlockHandle allocated_block_handle = m_BlockAllocator.Allocate(); + Block &allocated_block = m_Blocks[allocated_block_handle]; + + allocated_block.descriptor = BlockDescriptor(old_descriptor.GetOffset(), size, false); + allocated_block.next = -1; + + BlockDescriptor descriptor = allocated_block.descriptor.load(); + + // std::cout << "Allocated block is " << descriptor.GetOffset() << ", " << descriptor.GetSize() << std::endl; + + // Return the allocated block. + return allocated_block_handle; + } + + return -1; +} + +template +void FreeListAllocator::Deallocate(BlockHandle index) +{ + if (index == -1) + return; + + // std::cout << "Deallocating block " << index << std::endl; + Block &block = m_Blocks[index]; + + bool owns_index = true; + + // Try to coalesce the block with its neighbors. + while (true) + { + BlockHandle prev = -1; + BlockHandle next = m_Head.load(); + + // std::cout << "Finding the prev and next blocks." << std::endl; + + while (next != -1 && m_Blocks[next].descriptor.load().GetOffset() < block.descriptor.load().GetOffset()) + { + prev = next; + next = m_Blocks[next].next.load(); + } + + // Prev and next are the blocks that are adjacent to the block we are deallocating. + // Try to coalesce the block with its neighbors. + + if (TryCoalesceBlocks(index, prev, owns_index)) + { + // The coalescense attempt was successful. + // The index block is no longer valid. Deallocate it and set the prev block to us. + + if (!owns_index) + RemoveBlock(index); + + m_BlockAllocator.Deallocate(index); + + index = prev; + owns_index = false; + + continue; + } + + if (TryCoalesceBlocks(index, next, owns_index)) + { + // The coalescense attempt was successful. + // The next block is no longer valid. Deallocate it. + + RemoveBlock(index); + m_BlockAllocator.Deallocate(index); + + index = next; + owns_index = false; + + continue; + } + + break; + } + + // If we didn't coalesce the block with its neighbors, add it to the free list. + if (owns_index) + { + InsertBlockSorted(index); + MarkBlockAsFree(index, true); + } +} + +// Try to coalesce two blocks, one of which is owned by us. +// Return whether the coallescing was successful. +template +bool FreeListAllocator::TryCoalesceBlocks(BlockHandle a, BlockHandle b, bool owner_of_a) +{ + // std::cout << "Attempting to coalesce blocks " << a << " and " << b << std::endl; + + if (a == -1 || b == -1) + return false; + + if (!owner_of_a) + { + // std::cout << "Trying to own block A." << std::endl; + // Try to own block A. + if (!MarkBlockAsFree(a, false)) + { + // The block was not free, so we cannot start owning it. + // std::cout << "Starting to own block A failed." << std::endl; + return false; + } + } + + BlockDescriptor descriptor_a = m_Blocks[a].descriptor.load(); + BlockDescriptor descriptor_b = m_Blocks[b].descriptor.load(); + BlockDescriptor new_descriptor = descriptor_b; + + if (!descriptor_b.IsFree()) + { + // The B block was not free and as such cannot be coalesced. + // std::cout << "The B block was not free." << std::endl; + + // Return the A block to its original state and return. + // Note: since we're the owner, this cannot fail. + if (!owner_of_a) + MarkBlockAsFree(a, true); + + return false; + } + + if (descriptor_a.GetOffset() < descriptor_b.GetOffset()) + { + // std::cout << "A block is before B block." << std::endl; + + // The B block is after the A block. + if (descriptor_a.GetOffset() + descriptor_a.GetSize() != descriptor_b.GetOffset()) + { + // std::cout << "The blocks are not adjacent." << std::endl; + + // The blocks are not adjacent. Return the A block to its original state and return. + // Note: since we're the owner, this cannot fail. + if (!owner_of_a) + MarkBlockAsFree(a, true); + + return false; + } + + // Set the new descriptor of the B block. + new_descriptor.SetOffset(descriptor_a.GetOffset()); + new_descriptor.SetSize(descriptor_a.GetSize() + descriptor_b.GetSize()); + } + else + { + // std::cout << "B block is before A block." << std::endl; + + // The B block is before the A block. + if (descriptor_b.GetOffset() + descriptor_b.GetSize() != descriptor_a.GetOffset()) + { + // std::cout << "The blocks are not adjacent: " << descriptor_b.GetOffset() + descriptor_b.GetSize() << " != " << descriptor_a.GetOffset() << std::endl; + + // The blocks are not adjacent. Return the A block to its original state and return. + // Note: since we're the owner, this cannot fail. + if (!owner_of_a) + MarkBlockAsFree(a, true); + + return false; + } + + // Set the new size of the B block. + new_descriptor.SetSize(descriptor_a.GetSize() + descriptor_b.GetSize()); + } + + // std::cout << "Trying to set the new descriptor of the B block, with " << new_descriptor.GetOffset() << " and " << new_descriptor.GetSize() << std::endl; + + // Try to set the new descriptor of the B block. + if (!m_Blocks[b].descriptor.compare_exchange_strong(descriptor_b, new_descriptor)) + { + // The B block was changed by someone else. Return the A block to its original state and return. + // Note: since we're the owner, this cannot fail. + if (!owner_of_a) + MarkBlockAsFree(a, true); + + return false; + } + + // std::cout << "Succesfully coalesced blocks " << a << " and " << b << std::endl; + + return true; +} + +template +FreeListAllocator::BlockHandle FreeListAllocator::FindFirstFreeBlock(std::size_t size) +{ + BlockHandle current = m_Head.load(); + + while (current != -1) + { + Block &block = m_Blocks[current]; + BlockDescriptor descriptor = block.descriptor.load(); + + // Also check the free flag. The block might be on the free list but temporarily reserved. + if (descriptor.GetSize() >= size && descriptor.IsFree()) + { + return current; + } + + current = block.next.load(); + } + + return -1; +} + +template +std::size_t FreeListAllocator::GetOffset(BlockHandle index) +{ + return m_Blocks[index].descriptor.load().GetOffset(); +} + +template +void FreeListAllocator::InsertBlockSorted(BlockHandle index) +{ + BlockHandle previous = -1; + BlockHandle current; + + do + { + current = m_Head.load(); + + while (current != -1 && m_Blocks[current].descriptor.load().GetOffset() < m_Blocks[index].descriptor.load().GetOffset()) + { + previous = current; + current = m_Blocks[current].next; + } + + if (current == index) + { + // The block is already on the free list. + // std::cout << "Block " << index << " is already on the free list." << std::endl; + return; + } + + m_Blocks[index].next = current; + + if (previous == -1) + { + // std::cout << "Attempting to insert the block at the head." << std::endl; + + if (m_Head.compare_exchange_weak(current, index)) + { + // Successfully inserted the block. + // std::cout << "Successfully inserted the block." << std::endl; + return; + } + } + else + { + // std::cout << "Attempting to insert the block in the middle." << std::endl; + + if (m_Blocks[previous].next.compare_exchange_weak(current, index)) + { + // Successfully inserted the block. + // std::cout << "Successfully inserted the block." << std::endl; + return; + } + } + } while (true); +} + +template +bool FreeListAllocator::RemoveBlock(BlockHandle index) +{ + BlockHandle previous = -1; + BlockHandle current; + + // std::cout << "Removing block " << index << std::endl; + + do + { + current = m_Head.load(); + + // Find the previous block. + while (current != index && current != -1) + { + previous = current; + current = m_Blocks[current].next; + } + + if (current == -1) + { + // The block was not on the free list, even though it was supposed to be free. + // std::cout << "Block was not on the free list." << std::endl; + return false; + } + + if (previous == -1) + { + if (m_Head.compare_exchange_weak(current, m_Blocks[index].next)) + { + // Successfully removed the block. + return true; + } + } + else + { + if (m_Blocks[previous].next.compare_exchange_weak(current, m_Blocks[index].next)) + { + // Successfully removed the block. + return true; + } + } + } while (true); +} + +template +bool FreeListAllocator::MarkBlockAsFree(BlockHandle handle, bool mark_free) +{ + // std::cout << "Marking block " << handle << " as " << (mark_free ? "free" : "allocated") << std::endl; + + BlockDescriptor descriptor = m_Blocks[handle].descriptor.load(); + + if (descriptor.IsFree() == mark_free) + { + // The block is already in the desired state. + // std::cout << "The block is already in the desired state." << std::endl; + return false; + } + + BlockDescriptor new_descriptor = descriptor; + new_descriptor.SetFree(mark_free); + + if (!m_Blocks[handle].descriptor.compare_exchange_strong(descriptor, new_descriptor)) + { + // The block was changed in the meantime and we were unsuccessful. + // std::cout << "The block was changed in the meantime." << std::endl; + return false; + } + + // std::cout << "Successfully marked the block." << std::endl; + + return true; +} + +template +void FreeListAllocator::PrintState() +{ + BlockHandle current = m_Head; + + while (current != -1) + { + Block &block = m_Blocks[current]; + BlockDescriptor descriptor = block.descriptor.load(); + + std::cout << "Free block " << current << " has (offset, size) = (" << descriptor.GetOffset() << ", " << descriptor.GetSize() << ")." << std::endl; + + current = block.next; + } +} From 7b7f19155e64cb99ed9dea82037dd7bafbf683cd Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Fri, 6 Dec 2024 09:26:45 -0800 Subject: [PATCH 07/34] Check for coalecense requirements before owning block A. --- catkit_core/FreeListAllocator.h | 1 + catkit_core/FreeListAllocator.inl | 95 ++++++++++++++++--------------- 2 files changed, 51 insertions(+), 45 deletions(-) diff --git a/catkit_core/FreeListAllocator.h b/catkit_core/FreeListAllocator.h index 695af672..d34bb9df 100644 --- a/catkit_core/FreeListAllocator.h +++ b/catkit_core/FreeListAllocator.h @@ -91,6 +91,7 @@ class FreeListAllocator std::size_t GetOffset(BlockHandle index); void PrintState(); + size_t GetNumFreeBlocks() const; private: PoolAllocator m_BlockAllocator; diff --git a/catkit_core/FreeListAllocator.inl b/catkit_core/FreeListAllocator.inl index 8524c395..36d92801 100644 --- a/catkit_core/FreeListAllocator.inl +++ b/catkit_core/FreeListAllocator.inl @@ -1,5 +1,5 @@ #include "FreeListAllocator.h" - +#include "Timing.h" #include const std::size_t MAX_ATTEMPTS = 5; @@ -63,7 +63,7 @@ FreeListAllocator::BlockHandle FreeListAllocator::TryCoalesceBlocks(BlockHandle a if (a == -1 || b == -1) return false; - if (!owner_of_a) + BlockDescriptor descriptor_a = m_Blocks[a].descriptor.load(); + BlockDescriptor descriptor_b = m_Blocks[b].descriptor.load(); + + // Perform a pre-check on the blocks. + if (descriptor_a.GetOffset() < descriptor_b.GetOffset()) { - // std::cout << "Trying to own block A." << std::endl; - // Try to own block A. - if (!MarkBlockAsFree(a, false)) + if (descriptor_a.GetOffset() + descriptor_a.GetSize() != descriptor_b.GetOffset()) { - // The block was not free, so we cannot start owning it. - // std::cout << "Starting to own block A failed." << std::endl; + // The blocks are not adjacent. + // std::cout << "The blocks are not adjacent." << std::endl; + return false; + } + } + else + { + if (descriptor_b.GetOffset() + descriptor_b.GetSize() != descriptor_a.GetOffset()) + { + // The blocks are not adjacent. + // std::cout << "The blocks are not adjacent." << std::endl; return false; } } - - BlockDescriptor descriptor_a = m_Blocks[a].descriptor.load(); - BlockDescriptor descriptor_b = m_Blocks[b].descriptor.load(); - BlockDescriptor new_descriptor = descriptor_b; if (!descriptor_b.IsFree()) { // The B block was not free and as such cannot be coalesced. // std::cout << "The B block was not free." << std::endl; - // Return the A block to its original state and return. - // Note: since we're the owner, this cannot fail. - if (!owner_of_a) - MarkBlockAsFree(a, true); - return false; } - if (descriptor_a.GetOffset() < descriptor_b.GetOffset()) + // We are in principle good to coallesce the blocks. + // Start by owning the A block if we don't already. + if (!owner_of_a) { - // std::cout << "A block is before B block." << std::endl; + BlockDescriptor descriptor_a_old = descriptor_a; + descriptor_a.SetFree(false); - // The B block is after the A block. - if (descriptor_a.GetOffset() + descriptor_a.GetSize() != descriptor_b.GetOffset()) + // Try to own block A. + if (!m_Blocks[a].descriptor.compare_exchange_strong(descriptor_a_old, descriptor_a)) { - // std::cout << "The blocks are not adjacent." << std::endl; - - // The blocks are not adjacent. Return the A block to its original state and return. - // Note: since we're the owner, this cannot fail. - if (!owner_of_a) - MarkBlockAsFree(a, true); - + // The block was changed by someone else. We cannot own it. + // std::cout << "Starting to own block A failed." << std::endl; return false; } + } + + BlockDescriptor new_descriptor = descriptor_b; - // Set the new descriptor of the B block. + if (descriptor_a.GetOffset() < descriptor_b.GetOffset()) + { + // The B block is after the A block. new_descriptor.SetOffset(descriptor_a.GetOffset()); new_descriptor.SetSize(descriptor_a.GetSize() + descriptor_b.GetSize()); } else { - // std::cout << "B block is before A block." << std::endl; - // The B block is before the A block. - if (descriptor_b.GetOffset() + descriptor_b.GetSize() != descriptor_a.GetOffset()) - { - // std::cout << "The blocks are not adjacent: " << descriptor_b.GetOffset() + descriptor_b.GetSize() << " != " << descriptor_a.GetOffset() << std::endl; - - // The blocks are not adjacent. Return the A block to its original state and return. - // Note: since we're the owner, this cannot fail. - if (!owner_of_a) - MarkBlockAsFree(a, true); - - return false; - } - - // Set the new size of the B block. new_descriptor.SetSize(descriptor_a.GetSize() + descriptor_b.GetSize()); } // std::cout << "Trying to set the new descriptor of the B block, with " << new_descriptor.GetOffset() << " and " << new_descriptor.GetSize() << std::endl; // Try to set the new descriptor of the B block. - if (!m_Blocks[b].descriptor.compare_exchange_strong(descriptor_b, new_descriptor)) + if (!m_Blocks[b].descriptor.compare_exchange_weak(descriptor_b, new_descriptor)) { // The B block was changed by someone else. Return the A block to its original state and return. // Note: since we're the owner, this cannot fail. @@ -447,3 +437,18 @@ void FreeListAllocator::PrintState() current = block.next; } } + +template +size_t FreeListAllocator::GetNumFreeBlocks() const +{ + size_t count = 0; + BlockHandle current = m_Head; + + while (current != -1) + { + ++count; + current = m_Blocks[current].next; + } + + return count; +} From abe8d2fb3f3ae3b27d90ccb1079e1dbc272b6149 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Fri, 6 Dec 2024 09:27:47 -0800 Subject: [PATCH 08/34] Add benchmarks for FreeListAllocator. --- benchmarks/CMakeLists.txt | 6 ++ benchmarks/free_list_allocator.cpp | 91 ++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 benchmarks/free_list_allocator.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index bc716bd2..695b327e 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -24,7 +24,13 @@ add_executable(timestamp timestamp.cpp) target_include_directories(timestamp PUBLIC ../catkit_core) target_link_libraries(timestamp PUBLIC catkit_core) +# Free list allocator benchmark +add_executable(free_list_allocator free_list_allocator.cpp) +target_include_directories(free_list_allocator PUBLIC ../catkit_core) +target_link_libraries(free_list_allocator PUBLIC catkit_core) + # Add install files install(TARGETS datastream_latency DESTINATION bin) install(TARGETS datastream_submit DESTINATION bin) install(TARGETS timestamp DESTINATION bin) +install(TARGETS free_list_allocator DESTINATION bin) diff --git a/benchmarks/free_list_allocator.cpp b/benchmarks/free_list_allocator.cpp new file mode 100644 index 00000000..5ffd62be --- /dev/null +++ b/benchmarks/free_list_allocator.cpp @@ -0,0 +1,91 @@ +#include "FreeListAllocator.h" +#include "Timing.h" +#include + +void benchmark_linux_scalability() +{ + typedef FreeListAllocator<16384, 32> Allocator; + + const size_t N = 10000000; + + auto *handles = new Allocator::BlockHandle[N]; + + Allocator allocator(size_t(32) * N); + + auto start = GetTimeStamp(); + + for (size_t i = 0; i < N; ++i) + { + handles[i] = allocator.Allocate(16); + } + + for (size_t i = 0; i < N; ++i) + { + allocator.Deallocate(handles[i]); + } + + auto end = GetTimeStamp(); + + std::cout << "Linux Scalability:" << std::endl; + std::cout << "Time: " << (end - start) / 1e9 << " sec" << std::endl; + std::cout << "Throughput: " << 2 * N / ((end - start) / 1e9) << " ops/s" << std::endl; + + delete[] handles; +} + +void benchmark_larson() +{ + const size_t ALIGNMENT = 32; + typedef FreeListAllocator<16384, ALIGNMENT> Allocator; + + const size_t N = 10000000; + const size_t M = 1000; + const size_t MIN_SIZE = 16; + const size_t MAX_SIZE = 128; + + auto *handles = new Allocator::BlockHandle[M]; + for (size_t i = 0; i < M; ++i) + { + handles[i] = -1; + } + + auto *indices = new size_t[N]; + auto *sizes = new size_t[N]; + for (size_t i = 0; i < N; ++i) + { + indices[i] = rand() % M; + sizes[i] = (MIN_SIZE + (rand() % (MAX_SIZE - MIN_SIZE))) * ALIGNMENT; + } + + Allocator allocator(size_t(1024) * 1024 * 1024); + + auto start = GetTimeStamp(); + + for (size_t i = 0; i < N; ++i) + { + size_t index = indices[i]; + size_t size = sizes[i]; + + if (handles[index] != -1) + { + allocator.Deallocate(handles[index]); + } + + handles[index] = allocator.Allocate(size); + } + + auto end = GetTimeStamp(); + std::cout << "Larson benchmark:" << std::endl; + std::cout << "Time: " << (end - start) / 1e9 << " sec" << std::endl; + std::cout << "Throughput: " << (N * 2 - M) / ((end - start) / 1e9) << " ops/s" << std::endl; + + delete[] handles; +} + +int main(int argc, char **argv) +{ + benchmark_linux_scalability(); + benchmark_larson(); + + return 0; +} From 9684a46f3e3ec82ab6ed9126fb4d27cf0cfe0576 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Fri, 6 Dec 2024 13:44:59 -0800 Subject: [PATCH 09/34] Add threadtest benchmark. --- benchmarks/free_list_allocator.cpp | 36 ++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/benchmarks/free_list_allocator.cpp b/benchmarks/free_list_allocator.cpp index 5ffd62be..c56358c1 100644 --- a/benchmarks/free_list_allocator.cpp +++ b/benchmarks/free_list_allocator.cpp @@ -33,6 +33,41 @@ void benchmark_linux_scalability() delete[] handles; } +void benchmark_threadtest() +{ + typedef FreeListAllocator<16384, 32> Allocator; + + const size_t N = 100; + const size_t M = 100000; + + auto *handles = new Allocator::BlockHandle[M]; + + Allocator allocator(size_t(32) * M); + + auto start = GetTimeStamp(); + + for (size_t i = 0; i < M; ++i) + { + for (size_t j = 0; j < N; ++j) + { + handles[j] = allocator.Allocate(16); + } + + for (size_t j = 0; j < N; ++j) + { + allocator.Deallocate(handles[j]); + } + } + + auto end = GetTimeStamp(); + + std::cout << "Threadtest:" << std::endl; + std::cout << "Time: " << (end - start) / 1e9 << " sec" << std::endl; + std::cout << "Throughput: " << 2 * N * M / ((end - start) / 1e9) << " ops/s" << std::endl; + + delete[] handles; +} + void benchmark_larson() { const size_t ALIGNMENT = 32; @@ -85,6 +120,7 @@ void benchmark_larson() int main(int argc, char **argv) { benchmark_linux_scalability(); + benchmark_threadtest(); benchmark_larson(); return 0; From b77b24c66139c6c0a44abf2cd738f15f399d3b36 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Fri, 6 Dec 2024 13:45:13 -0800 Subject: [PATCH 10/34] Add benchmark for pool allocator. --- benchmarks/CMakeLists.txt | 6 +++++ benchmarks/pool_allocator.cpp | 41 +++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 benchmarks/pool_allocator.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 695b327e..7bbce84d 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -29,8 +29,14 @@ add_executable(free_list_allocator free_list_allocator.cpp) target_include_directories(free_list_allocator PUBLIC ../catkit_core) target_link_libraries(free_list_allocator PUBLIC catkit_core) +# Pool allocator benchmark +add_executable(pool_allocator pool_allocator.cpp) +target_include_directories(pool_allocator PUBLIC ../catkit_core) +target_link_libraries(pool_allocator PUBLIC catkit_core) + # Add install files install(TARGETS datastream_latency DESTINATION bin) install(TARGETS datastream_submit DESTINATION bin) install(TARGETS timestamp DESTINATION bin) install(TARGETS free_list_allocator DESTINATION bin) +install(TARGETS pool_allocator DESTINATION bin) diff --git a/benchmarks/pool_allocator.cpp b/benchmarks/pool_allocator.cpp new file mode 100644 index 00000000..1e06ea67 --- /dev/null +++ b/benchmarks/pool_allocator.cpp @@ -0,0 +1,41 @@ +#include "PoolAllocator.h" +#include "Timing.h" +#include + +void benchmark_linux_scalability() +{ + typedef PoolAllocator<16384> Allocator; + + const size_t N = 10000000; + + auto *handles = new size_t[N]; + + Allocator allocator; + + auto start = GetTimeStamp(); + + for (size_t i = 0; i < N; ++i) + { + handles[i] = allocator.Allocate(); + } + + for (size_t i = 0; i < N; ++i) + { + allocator.Deallocate(handles[i]); + } + + auto end = GetTimeStamp(); + + std::cout << "Linux Scalability:" << std::endl; + std::cout << "Time: " << (end - start) / 1e9 << " sec" << std::endl; + std::cout << "Throughput: " << 2 * N / ((end - start) / 1e9) << " ops/s" << std::endl; + + delete[] handles; +} + +int main(int argc, char **argv) +{ + benchmark_linux_scalability(); + + return 0; +} From bbcb88675087a5217b81008bfbae7b3feb0bf3e9 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Fri, 6 Dec 2024 13:46:19 -0800 Subject: [PATCH 11/34] Fix for expected cannot be atomic. --- catkit_core/PoolAllocator.inl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/catkit_core/PoolAllocator.inl b/catkit_core/PoolAllocator.inl index 6e02b1f9..4ef00b3f 100644 --- a/catkit_core/PoolAllocator.inl +++ b/catkit_core/PoolAllocator.inl @@ -51,9 +51,12 @@ void PoolAllocator::Deallocate(std::size_t index) return; } + std::size_t head; + // Push the element back on the front of the linked list. do { - m_Next[index] = m_Head.load(std::memory_order_relaxed); - } while (!m_Head.compare_exchange_weak(m_Next[index], index)); -} \ No newline at end of file + head = m_Head.load(std::memory_order_relaxed); + m_Next[index] = head; + } while (!m_Head.compare_exchange_weak(head, index)); +} From 35542f6b160956703bdc16d636036a477c91b497 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Fri, 6 Dec 2024 15:07:22 -0800 Subject: [PATCH 12/34] Fix typename compiler warning. --- catkit_core/FreeListAllocator.inl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/catkit_core/FreeListAllocator.inl b/catkit_core/FreeListAllocator.inl index 36d92801..aec9e0fd 100644 --- a/catkit_core/FreeListAllocator.inl +++ b/catkit_core/FreeListAllocator.inl @@ -20,7 +20,7 @@ FreeListAllocator::~FreeListAllocator() } template -FreeListAllocator::BlockHandle FreeListAllocator::Allocate(std::size_t size) +typename FreeListAllocator::BlockHandle FreeListAllocator::Allocate(std::size_t size) { // Round up the size to the nearest multiple of the alignment. size = (size + Alignment - 1) & ~(Alignment - 1); @@ -271,7 +271,7 @@ bool FreeListAllocator::TryCoalesceBlocks(BlockHandle a } template -FreeListAllocator::BlockHandle FreeListAllocator::FindFirstFreeBlock(std::size_t size) +typename FreeListAllocator::BlockHandle FreeListAllocator::FindFirstFreeBlock(std::size_t size) { BlockHandle current = m_Head.load(); From 2fd43a45b7828afeef473774c4d567ccb5ccc35c Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Fri, 6 Dec 2024 15:07:35 -0800 Subject: [PATCH 13/34] Add missing include. --- catkit_core/PoolAllocator.h | 1 + 1 file changed, 1 insertion(+) diff --git a/catkit_core/PoolAllocator.h b/catkit_core/PoolAllocator.h index 92524f0f..36176cd2 100644 --- a/catkit_core/PoolAllocator.h +++ b/catkit_core/PoolAllocator.h @@ -3,6 +3,7 @@ #include #include +#include // A simple lock-free pool allocator. template From c61018774aa03d6457dbce574e9c9f1426c84c48 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Fri, 6 Dec 2024 16:42:57 -0800 Subject: [PATCH 14/34] Use 32bit offset and size. --- catkit_core/FreeListAllocator.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/catkit_core/FreeListAllocator.h b/catkit_core/FreeListAllocator.h index d34bb9df..90adf001 100644 --- a/catkit_core/FreeListAllocator.h +++ b/catkit_core/FreeListAllocator.h @@ -12,8 +12,8 @@ class FreeListAllocator { public: using BlockHandle = std::int32_t; - using Offset = std::uint64_t; - using Size = std::uint64_t; + using Offset = std::uint32_t; + using Size = std::uint32_t; static const BlockHandle INVALID_HANDLE = -1; @@ -70,7 +70,7 @@ class FreeListAllocator Offset m_OffsetAndFreeFlag; Size m_Size; - static const Offset _FREE_FLAG = 0x8000000000000000; + static const Offset _FREE_FLAG = 0x80000000; }; // Check that the BlockDescriptor is lock-free atomic. From b7543ce5a3d09a69bc4922ccf7f894279ec806c9 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Fri, 6 Dec 2024 16:43:18 -0800 Subject: [PATCH 15/34] Put free-flag on the size (since it's usually smaller). --- catkit_core/FreeListAllocator.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/catkit_core/FreeListAllocator.h b/catkit_core/FreeListAllocator.h index 90adf001..f03f58a7 100644 --- a/catkit_core/FreeListAllocator.h +++ b/catkit_core/FreeListAllocator.h @@ -32,43 +32,43 @@ class FreeListAllocator void Set(const Offset &offset, const Size &size, const bool &is_free) { - m_OffsetAndFreeFlag = (offset & ~_FREE_FLAG) | (_FREE_FLAG * is_free); - m_Size = size; + m_Offset = offset; + m_SizeAndFreeFlag = (offset & ~_FREE_FLAG) | (_FREE_FLAG * is_free); } Offset GetOffset() const { - return m_OffsetAndFreeFlag & ~_FREE_FLAG; + return m_Offset; } void SetOffset(const Offset &new_offset) { - m_OffsetAndFreeFlag = new_offset | (m_OffsetAndFreeFlag & _FREE_FLAG); + m_Offset = new_offset; } Size GetSize() const { - return m_Size; + return m_SizeAndFreeFlag & ~_FREE_FLAG; } void SetSize(const Size &new_size) { - m_Size = new_size; + m_SizeAndFreeFlag = (new_size & ~_FREE_FLAG) | (m_SizeAndFreeFlag & _FREE_FLAG); } bool IsFree() const { - return m_OffsetAndFreeFlag & _FREE_FLAG; + return m_SizeAndFreeFlag & _FREE_FLAG; } void SetFree(const bool &is_free) { - m_OffsetAndFreeFlag = (m_OffsetAndFreeFlag & ~_FREE_FLAG) | (_FREE_FLAG * is_free); + m_SizeAndFreeFlag = (m_SizeAndFreeFlag & ~_FREE_FLAG) | (_FREE_FLAG * is_free); } private: - Offset m_OffsetAndFreeFlag; - Size m_Size; + Offset m_Offset; + Size m_SizeAndFreeFlag; static const Offset _FREE_FLAG = 0x80000000; }; From 7cd9c22bd7a93959b71d8bc5e8cbe39026931f0c Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Fri, 6 Dec 2024 16:54:48 -0800 Subject: [PATCH 16/34] Fix typo. --- catkit_core/FreeListAllocator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catkit_core/FreeListAllocator.h b/catkit_core/FreeListAllocator.h index f03f58a7..519fe464 100644 --- a/catkit_core/FreeListAllocator.h +++ b/catkit_core/FreeListAllocator.h @@ -33,7 +33,7 @@ class FreeListAllocator void Set(const Offset &offset, const Size &size, const bool &is_free) { m_Offset = offset; - m_SizeAndFreeFlag = (offset & ~_FREE_FLAG) | (_FREE_FLAG * is_free); + m_SizeAndFreeFlag = (size & ~_FREE_FLAG) | (_FREE_FLAG * is_free); } Offset GetOffset() const From 53c33769fb402cdb570e683d8a664b427d0db2fe Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Fri, 6 Dec 2024 18:09:07 -0800 Subject: [PATCH 17/34] Use an external buffer. --- benchmarks/pool_allocator.cpp | 11 +++-- catkit_core/CMakeLists.txt | 1 + catkit_core/FreeListAllocator.h | 2 +- catkit_core/FreeListAllocator.inl | 1 + catkit_core/PoolAllocator.cpp | 68 +++++++++++++++++++++++++++++++ catkit_core/PoolAllocator.h | 17 ++++---- catkit_core/PoolAllocator.inl | 62 ---------------------------- 7 files changed, 88 insertions(+), 74 deletions(-) create mode 100644 catkit_core/PoolAllocator.cpp delete mode 100644 catkit_core/PoolAllocator.inl diff --git a/benchmarks/pool_allocator.cpp b/benchmarks/pool_allocator.cpp index 1e06ea67..bd1d11ab 100644 --- a/benchmarks/pool_allocator.cpp +++ b/benchmarks/pool_allocator.cpp @@ -4,13 +4,15 @@ void benchmark_linux_scalability() { - typedef PoolAllocator<16384> Allocator; - const size_t N = 10000000; + const size_t CAPACITY = 2 * N; - auto *handles = new size_t[N]; + void *buffer = new char[PoolAllocator::CalculateBufferSize(CAPACITY)]; - Allocator allocator; + PoolAllocator allocator(buffer, CAPACITY); + allocator.Initialize(); + + auto *handles = new size_t[N]; auto start = GetTimeStamp(); @@ -31,6 +33,7 @@ void benchmark_linux_scalability() std::cout << "Throughput: " << 2 * N / ((end - start) / 1e9) << " ops/s" << std::endl; delete[] handles; + delete[] buffer; } int main(int argc, char **argv) diff --git a/catkit_core/CMakeLists.txt b/catkit_core/CMakeLists.txt index af141bdd..c4966ded 100644 --- a/catkit_core/CMakeLists.txt +++ b/catkit_core/CMakeLists.txt @@ -33,6 +33,7 @@ add_library(catkit_core STATIC Tracing.cpp Types.cpp Util.cpp + PoolAllocator.cpp proto/core.pb.cc proto/logging.pb.cc proto/testbed.pb.cc diff --git a/catkit_core/FreeListAllocator.h b/catkit_core/FreeListAllocator.h index 519fe464..f14154f6 100644 --- a/catkit_core/FreeListAllocator.h +++ b/catkit_core/FreeListAllocator.h @@ -94,7 +94,7 @@ class FreeListAllocator size_t GetNumFreeBlocks() const; private: - PoolAllocator m_BlockAllocator; + PoolAllocator m_BlockAllocator; Block m_Blocks[MaxNumBlocks]; std::atomic m_Head; diff --git a/catkit_core/FreeListAllocator.inl b/catkit_core/FreeListAllocator.inl index aec9e0fd..d7f6d07c 100644 --- a/catkit_core/FreeListAllocator.inl +++ b/catkit_core/FreeListAllocator.inl @@ -6,6 +6,7 @@ const std::size_t MAX_ATTEMPTS = 5; template FreeListAllocator::FreeListAllocator(std::size_t buffer_size) + : m_BlockAllocator(nullptr, MaxNumBlocks) { // Initialize the free list. m_Head = m_BlockAllocator.Allocate(); diff --git a/catkit_core/PoolAllocator.cpp b/catkit_core/PoolAllocator.cpp new file mode 100644 index 00000000..cda70157 --- /dev/null +++ b/catkit_core/PoolAllocator.cpp @@ -0,0 +1,68 @@ +#include "PoolAllocator.h" + +PoolAllocator::PoolAllocator(void *buffer, std::size_t capacity) + : m_Head(static_cast(buffer)), m_Next(static_cast(buffer) + 1), m_Capacity(capacity) +{ +} + +void PoolAllocator::Initialize() +{ + // Initialize the linked list. + for (std::size_t i = 0; i < m_Capacity; ++i) + { + if (i == m_Capacity - 1) + { + m_Next[i] = -1; + } + else + { + m_Next[i] = i + 1; + } + } +} + +std::size_t PoolAllocator::CalculateBufferSize(std::size_t capacity) +{ + return sizeof(std::atomic_size_t) * (capacity + 1); +} + +std::size_t PoolAllocator::Allocate() +{ + std::size_t head; + std::size_t next; + + // Pop the first element from the linked list. + do + { + head = m_Head->load(std::memory_order_relaxed); + + // Check if the pool is empty. + if (head == -1) + { + return -1; + } + + next = m_Next[head].load(std::memory_order_relaxed); + } while (!m_Head->compare_exchange_weak(head, next)); + + // Return the popped element. + return head; +} + +void PoolAllocator::Deallocate(std::size_t index) +{ + // Check if the element is within the pool bounds. + if (index >= m_Capacity) + { + return; + } + + std::size_t head; + + // Push the element back on the front of the linked list. + do + { + head = m_Head->load(std::memory_order_relaxed); + m_Next[index] = head; + } while (!m_Head->compare_exchange_weak(head, index)); +} diff --git a/catkit_core/PoolAllocator.h b/catkit_core/PoolAllocator.h index 36176cd2..9809f3af 100644 --- a/catkit_core/PoolAllocator.h +++ b/catkit_core/PoolAllocator.h @@ -6,19 +6,22 @@ #include // A simple lock-free pool allocator. -template class PoolAllocator { -private: - std::atomic_size_t m_Head; - std::atomic_size_t m_Next[Size]; public: - PoolAllocator(); + PoolAllocator(void *buffer, size_t capacity); + + void Initialize(); + + static std::size_t CalculateBufferSize(std::size_t capacity); size_t Allocate(); void Deallocate(size_t index); -}; -#include "PoolAllocator.inl" +private: + std::size_t m_Capacity; + std::atomic_size_t *m_Head; + std::atomic_size_t *m_Next; +}; #endif // POOL_ALLOCATOR_H diff --git a/catkit_core/PoolAllocator.inl b/catkit_core/PoolAllocator.inl deleted file mode 100644 index 4ef00b3f..00000000 --- a/catkit_core/PoolAllocator.inl +++ /dev/null @@ -1,62 +0,0 @@ -#include "PoolAllocator.h" - -template -PoolAllocator::PoolAllocator() - : m_Head(0) -{ - // Initialize the linked list. - for (size_t i = 0; i < Size; ++i) - { - if (i == Size - 1) - { - m_Next[i] = -1; - } - else - { - m_Next[i] = i + 1; - } - } -} - -template -std::size_t PoolAllocator::Allocate() -{ - std::size_t head; - std::size_t next; - - // Pop the first element from the linked list. - do - { - head = m_Head.load(std::memory_order_relaxed); - - // Check if the pool is empty. - if (head == -1) - { - return -1; - } - - next = m_Next[head].load(std::memory_order_relaxed); - } while (!m_Head.compare_exchange_weak(head, next)); - - // Return the popped element. - return head; -} - -template -void PoolAllocator::Deallocate(std::size_t index) -{ - // Check if the element is within the pool bounds. - if (index >= Size) - { - return; - } - - std::size_t head; - - // Push the element back on the front of the linked list. - do - { - head = m_Head.load(std::memory_order_relaxed); - m_Next[index] = head; - } while (!m_Head.compare_exchange_weak(head, index)); -} From 267cee0aa8cf4c3c8501ec11161940d33568cf3e Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Wed, 11 Dec 2024 11:59:41 -0800 Subject: [PATCH 18/34] Refactor pool allocator to use an external buffer. --- benchmarks/pool_allocator.cpp | 6 ++--- catkit_core/PoolAllocator.cpp | 47 +++++++++++++++++++++-------------- catkit_core/PoolAllocator.h | 35 ++++++++++++++++++++------ 3 files changed, 59 insertions(+), 29 deletions(-) diff --git a/benchmarks/pool_allocator.cpp b/benchmarks/pool_allocator.cpp index bd1d11ab..9c642e1c 100644 --- a/benchmarks/pool_allocator.cpp +++ b/benchmarks/pool_allocator.cpp @@ -7,10 +7,10 @@ void benchmark_linux_scalability() const size_t N = 10000000; const size_t CAPACITY = 2 * N; - void *buffer = new char[PoolAllocator::CalculateBufferSize(CAPACITY)]; + char *buffer = new char[PoolAllocator::CalculateMetadataBufferSize(CAPACITY)]; - PoolAllocator allocator(buffer, CAPACITY); - allocator.Initialize(); + PoolAllocator allocator(buffer); + allocator.Initialize(CAPACITY); auto *handles = new size_t[N]; diff --git a/catkit_core/PoolAllocator.cpp b/catkit_core/PoolAllocator.cpp index cda70157..a7c38f55 100644 --- a/catkit_core/PoolAllocator.cpp +++ b/catkit_core/PoolAllocator.cpp @@ -1,18 +1,29 @@ #include "PoolAllocator.h" -PoolAllocator::PoolAllocator(void *buffer, std::size_t capacity) - : m_Head(static_cast(buffer)), m_Next(static_cast(buffer) + 1), m_Capacity(capacity) +const std::uint8_t VERSION[4] = {0, 0, 0, 0}; + +PoolAllocator::PoolAllocator(void *metadata_buffer) + : m_Header(*static_cast
(metadata_buffer)), + m_Capacity(m_Header.capacity), + m_Head(m_Header.head), + m_Next(reinterpret_cast(static_cast(metadata_buffer) + sizeof(Header))) { } -void PoolAllocator::Initialize() +void PoolAllocator::Initialize(std::uint32_t capacity) { + // Set version and capacity. + std::memcpy(m_Header.version, VERSION, sizeof(VERSION)); + m_Capacity = capacity; + // Initialize the linked list. + m_Head.store(0, std::memory_order_relaxed); + for (std::size_t i = 0; i < m_Capacity; ++i) { if (i == m_Capacity - 1) { - m_Next[i] = -1; + m_Next[i] = INVALID_HANDLE; } else { @@ -21,35 +32,36 @@ void PoolAllocator::Initialize() } } -std::size_t PoolAllocator::CalculateBufferSize(std::size_t capacity) +std::size_t PoolAllocator::CalculateMetadataBufferSize(std::uint32_t capacity) { - return sizeof(std::atomic_size_t) * (capacity + 1); + std::size_t size = sizeof(Header); + size += capacity * sizeof(std::atomic); + + return size; } -std::size_t PoolAllocator::Allocate() +PoolAllocator::BlockHandle PoolAllocator::Allocate() { - std::size_t head; - std::size_t next; + BlockHandle head = m_Head.load(std::memory_order_relaxed); + BlockHandle next; // Pop the first element from the linked list. do { - head = m_Head->load(std::memory_order_relaxed); - // Check if the pool is empty. - if (head == -1) + if (head == INVALID_HANDLE) { - return -1; + return INVALID_HANDLE; } next = m_Next[head].load(std::memory_order_relaxed); - } while (!m_Head->compare_exchange_weak(head, next)); + } while (!m_Head.compare_exchange_weak(head, next)); // Return the popped element. return head; } -void PoolAllocator::Deallocate(std::size_t index) +void PoolAllocator::Deallocate(BlockHandle index) { // Check if the element is within the pool bounds. if (index >= m_Capacity) @@ -57,12 +69,11 @@ void PoolAllocator::Deallocate(std::size_t index) return; } - std::size_t head; + BlockHandle head = m_Head.load(std::memory_order_relaxed);; // Push the element back on the front of the linked list. do { - head = m_Head->load(std::memory_order_relaxed); m_Next[index] = head; - } while (!m_Head->compare_exchange_weak(head, index)); + } while (!m_Head.compare_exchange_weak(head, index)); } diff --git a/catkit_core/PoolAllocator.h b/catkit_core/PoolAllocator.h index 9809f3af..4b5397c4 100644 --- a/catkit_core/PoolAllocator.h +++ b/catkit_core/PoolAllocator.h @@ -4,24 +4,43 @@ #include #include #include +#include // A simple lock-free pool allocator. class PoolAllocator { public: - PoolAllocator(void *buffer, size_t capacity); + using BlockHandle = std::uint32_t; + static const BlockHandle INVALID_HANDLE = std::numeric_limits::max(); - void Initialize(); + PoolAllocator(void *metadata_buffer); - static std::size_t CalculateBufferSize(std::size_t capacity); + void Initialize(std::uint32_t capacity); - size_t Allocate(); - void Deallocate(size_t index); + static std::size_t CalculateMetadataBufferSize(std::uint32_t capacity); + + BlockHandle Allocate(); + void Deallocate(BlockHandle index); private: - std::size_t m_Capacity; - std::atomic_size_t *m_Head; - std::atomic_size_t *m_Next; + struct Header + { + std::uint8_t version[4]; + std::uint32_t capacity; + std::atomic head; + }; + + // Ensure a specific memory layout. + static_assert(offsetof(PoolAllocator::Header, version) == 0); + static_assert(offsetof(PoolAllocator::Header, capacity) == 4); + static_assert(offsetof(PoolAllocator::Header, head) == 8); + static_assert(sizeof(PoolAllocator::Header) == 12); + + Header &m_Header; + + std::uint32_t &m_Capacity; + std::atomic &m_Head; + std::atomic *m_Next; }; #endif // POOL_ALLOCATOR_H From f9f77b300c425c0dc0d2306672b0c1f1b51a3818 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Wed, 11 Dec 2024 12:00:03 -0800 Subject: [PATCH 19/34] Refactor free list allocator to use external buffer. --- benchmarks/free_list_allocator.cpp | 38 ++- catkit_core/CMakeLists.txt | 1 + ...istAllocator.inl => FreeListAllocator.cpp} | 226 ++++++++++++------ catkit_core/FreeListAllocator.h | 115 +++++---- 4 files changed, 229 insertions(+), 151 deletions(-) rename catkit_core/{FreeListAllocator.inl => FreeListAllocator.cpp} (55%) diff --git a/benchmarks/free_list_allocator.cpp b/benchmarks/free_list_allocator.cpp index c56358c1..2ded58ce 100644 --- a/benchmarks/free_list_allocator.cpp +++ b/benchmarks/free_list_allocator.cpp @@ -4,13 +4,20 @@ void benchmark_linux_scalability() { - typedef FreeListAllocator<16384, 32> Allocator; + typedef FreeListAllocator Allocator; const size_t N = 10000000; - auto *handles = new Allocator::BlockHandle[N]; + const size_t NUM_BLOCKS = N * 2; + const size_t ALIGNMENT = 32; + + auto *handles = new FreeListAllocator::BlockHandle[N]; + + size_t buffer_size = FreeListAllocator::ComputeMetadataBufferSize(NUM_BLOCKS); + char *buffer = new char[buffer_size]; - Allocator allocator(size_t(32) * N); + FreeListAllocator allocator(buffer); + allocator.Initialize(NUM_BLOCKS, ALIGNMENT, NUM_BLOCKS * ALIGNMENT); auto start = GetTimeStamp(); @@ -31,18 +38,23 @@ void benchmark_linux_scalability() std::cout << "Throughput: " << 2 * N / ((end - start) / 1e9) << " ops/s" << std::endl; delete[] handles; + delete[] buffer; } void benchmark_threadtest() { - typedef FreeListAllocator<16384, 32> Allocator; - const size_t N = 100; const size_t M = 100000; + const size_t NUM_BLOCKS = N * 2; + const size_t ALIGNMENT = 32; - auto *handles = new Allocator::BlockHandle[M]; + auto *handles = new FreeListAllocator::BlockHandle[M]; - Allocator allocator(size_t(32) * M); + size_t buffer_size = FreeListAllocator::ComputeMetadataBufferSize(NUM_BLOCKS); + char *buffer = new char[buffer_size]; + + FreeListAllocator allocator(buffer); + allocator.Initialize(NUM_BLOCKS, ALIGNMENT, NUM_BLOCKS * ALIGNMENT); auto start = GetTimeStamp(); @@ -71,19 +83,25 @@ void benchmark_threadtest() void benchmark_larson() { const size_t ALIGNMENT = 32; - typedef FreeListAllocator<16384, ALIGNMENT> Allocator; const size_t N = 10000000; const size_t M = 1000; const size_t MIN_SIZE = 16; const size_t MAX_SIZE = 128; + const size_t NUM_BLOCKS = M * 2; - auto *handles = new Allocator::BlockHandle[M]; + auto *handles = new FreeListAllocator::BlockHandle[M]; for (size_t i = 0; i < M; ++i) { handles[i] = -1; } + size_t buffer_size = FreeListAllocator::ComputeMetadataBufferSize(NUM_BLOCKS); + char *buffer = new char[buffer_size]; + + FreeListAllocator allocator(buffer); + allocator.Initialize(NUM_BLOCKS, ALIGNMENT, MAX_SIZE * NUM_BLOCKS); + auto *indices = new size_t[N]; auto *sizes = new size_t[N]; for (size_t i = 0; i < N; ++i) @@ -92,8 +110,6 @@ void benchmark_larson() sizes[i] = (MIN_SIZE + (rand() % (MAX_SIZE - MIN_SIZE))) * ALIGNMENT; } - Allocator allocator(size_t(1024) * 1024 * 1024); - auto start = GetTimeStamp(); for (size_t i = 0; i < N; ++i) diff --git a/catkit_core/CMakeLists.txt b/catkit_core/CMakeLists.txt index c4966ded..6258d432 100644 --- a/catkit_core/CMakeLists.txt +++ b/catkit_core/CMakeLists.txt @@ -34,6 +34,7 @@ add_library(catkit_core STATIC Types.cpp Util.cpp PoolAllocator.cpp + FreeListAllocator.cpp proto/core.pb.cc proto/logging.pb.cc proto/testbed.pb.cc diff --git a/catkit_core/FreeListAllocator.inl b/catkit_core/FreeListAllocator.cpp similarity index 55% rename from catkit_core/FreeListAllocator.inl rename to catkit_core/FreeListAllocator.cpp index d7f6d07c..df1ff108 100644 --- a/catkit_core/FreeListAllocator.inl +++ b/catkit_core/FreeListAllocator.cpp @@ -2,41 +2,114 @@ #include "Timing.h" #include +//#define DEBUG_PRINT(a) std::cout << a << std::endl +#define DEBUG_PRINT(a) + const std::size_t MAX_ATTEMPTS = 5; +const std::uint8_t VERSION[4] = {0, 0, 0, 0}; -template -FreeListAllocator::FreeListAllocator(std::size_t buffer_size) - : m_BlockAllocator(nullptr, MaxNumBlocks) +FreeListAllocator::BlockDescriptor::BlockDescriptor() { - // Initialize the free list. - m_Head = m_BlockAllocator.Allocate(); +} - m_Blocks[m_Head].descriptor = BlockDescriptor(0, buffer_size, true); - m_Blocks[m_Head].next = -1; +FreeListAllocator::BlockDescriptor::BlockDescriptor(Offset offset, Size size, bool is_free) +{ + Set(offset, size, is_free); +} + +void FreeListAllocator::BlockDescriptor::Set(const Offset &offset, const Size &size, const bool &is_free) +{ + m_Offset = offset; + m_SizeAndFreeFlag = (size & ~_FREE_FLAG) | (_FREE_FLAG * is_free); +} + +FreeListAllocator::Offset FreeListAllocator::BlockDescriptor::GetOffset() const +{ + return m_Offset; +} + +void FreeListAllocator::BlockDescriptor::SetOffset(const Offset &new_offset) +{ + m_Offset = new_offset; } -template -FreeListAllocator::~FreeListAllocator() +FreeListAllocator::Size FreeListAllocator::BlockDescriptor::GetSize() const { + return m_SizeAndFreeFlag & ~_FREE_FLAG; } -template -typename FreeListAllocator::BlockHandle FreeListAllocator::Allocate(std::size_t size) +void FreeListAllocator::BlockDescriptor::SetSize(const Size &new_size) +{ + m_SizeAndFreeFlag = (new_size & ~_FREE_FLAG) | (m_SizeAndFreeFlag & _FREE_FLAG); +} + +bool FreeListAllocator::BlockDescriptor::IsFree() const +{ + return m_SizeAndFreeFlag & _FREE_FLAG; +} + +void FreeListAllocator::BlockDescriptor::SetFree(const bool &is_free) +{ + m_SizeAndFreeFlag = (m_SizeAndFreeFlag & ~_FREE_FLAG) | (_FREE_FLAG * is_free); +} + +FreeListAllocator::FreeListAllocator(void *metadata_buffer) + : m_Header(*static_cast
(metadata_buffer)), + m_MaxNumBlocks(m_Header.max_num_blocks), + m_Head(m_Header.head), + m_Alignment(m_Header.alignment), + m_BlockAllocator(static_cast(metadata_buffer) + sizeof(Header)), + m_MetadataBuffer(metadata_buffer) +{ + std::size_t block_list_offset = sizeof(Header) + PoolAllocator::CalculateMetadataBufferSize(m_MaxNumBlocks); + m_Blocks = reinterpret_cast(static_cast(m_MetadataBuffer) + block_list_offset); +} + +std::size_t FreeListAllocator::ComputeMetadataBufferSize(std::size_t max_num_blocks) +{ + std::size_t size = sizeof(Header); + size += PoolAllocator::CalculateMetadataBufferSize(max_num_blocks); + size += sizeof(Block) * max_num_blocks; + + return size; +} + +void FreeListAllocator::Initialize(std::size_t max_num_blocks, std::size_t alignment, std::size_t buffer_size) +{ + std::memcpy(m_Header.version, VERSION, sizeof(VERSION)); + m_Header.max_num_blocks = max_num_blocks; + m_Header.alignment = alignment; + m_Header.total_buffer_size = buffer_size; + + // Initialize the internal allocator. + m_BlockAllocator.Initialize(max_num_blocks); + + // Initialize the free list. + m_Head = m_BlockAllocator.Allocate(); + + std::size_t block_list_offset = sizeof(Header) + PoolAllocator::CalculateMetadataBufferSize(m_MaxNumBlocks); + m_Blocks = reinterpret_cast(static_cast(m_MetadataBuffer) + block_list_offset); + + m_Blocks[m_Head].descriptor = BlockDescriptor(0, buffer_size, true); + m_Blocks[m_Head].next = INVALID_HANDLE; +} + +typename FreeListAllocator::BlockHandle FreeListAllocator::Allocate(std::size_t size) { // Round up the size to the nearest multiple of the alignment. - size = (size + Alignment - 1) & ~(Alignment - 1); + size = (size + m_Alignment - 1) & ~(m_Alignment - 1); - // std::cout << "Allocating " << size << std::endl; + DEBUG_PRINT("Allocating " << size); for (size_t i = 0; i < MAX_ATTEMPTS; ++i) { BlockHandle index = FindFirstFreeBlock(size); Block &free_block = m_Blocks[index]; - if (index == -1) + if (index == INVALID_HANDLE) { - // std::cout << "No free block found." << std::endl; - return -1; + DEBUG_PRINT("No free block found."); + return INVALID_HANDLE; } BlockDescriptor old_descriptor; @@ -51,14 +124,14 @@ typename FreeListAllocator::BlockHandle FreeListAllocat if (old_descriptor.GetSize() < size || !old_descriptor.IsFree()) { // Break out of the nested loop and try again. - // std::cout << "Block is too small or not free. Size of block is " << old_descriptor.GetSize() << std::endl; + DEBUG_PRINT("Block is too small or not free. Size of block is " << old_descriptor.GetSize()); break; } if (old_descriptor.GetSize() == size) { // The block is exactly the right size. - // std::cout << "Block is exactly the right size." << std::endl; + DEBUG_PRINT("Block is exactly the right size."); // Mark the block as allocated. if (MarkBlockAsFree(index, false)) @@ -89,37 +162,40 @@ typename FreeListAllocator::BlockHandle FreeListAllocat continue; } - // std::cout << "Reduced the size of the free block: " << old_descriptor.GetSize() << ", " << new_descriptor.GetSize() << std::endl; - // std::cout << "Old descriptor offset: " << old_descriptor.GetOffset() << std::endl; - // std::cout << "Old descriptor size: " << old_descriptor.GetSize() << std::endl; - // std::cout << "New size: " << size << std::endl; + DEBUG_PRINT("Reduced the size of the free block: " << old_descriptor.GetSize() << ", " << new_descriptor.GetSize()); + DEBUG_PRINT("Old descriptor offset: " << old_descriptor.GetOffset()); + DEBUG_PRINT("Old descriptor size: " << old_descriptor.GetSize()); + DEBUG_PRINT("New size: " << size); // We now have a block that is large enough to allocate the requested size. // Add a new block for the remaining free space. - BlockHandle allocated_block_handle = m_BlockAllocator.Allocate(); + PoolAllocator::BlockHandle allocated_block_handle = m_BlockAllocator.Allocate(); + DEBUG_PRINT("Allocated block handle is " << allocated_block_handle); + Block &allocated_block = m_Blocks[allocated_block_handle]; allocated_block.descriptor = BlockDescriptor(old_descriptor.GetOffset(), size, false); - allocated_block.next = -1; + allocated_block.next = INVALID_HANDLE; + + DEBUG_PRINT("Done setting the descriptor."); BlockDescriptor descriptor = allocated_block.descriptor.load(); - // std::cout << "Allocated block is " << descriptor.GetOffset() << ", " << descriptor.GetSize() << std::endl; + DEBUG_PRINT("Allocated block is " << descriptor.GetOffset() << ", " << descriptor.GetSize()); // Return the allocated block. return allocated_block_handle; } - return -1; + return INVALID_HANDLE; } -template -void FreeListAllocator::Deallocate(BlockHandle index) +void FreeListAllocator::Deallocate(BlockHandle index) { - if (index == -1) + if (index == INVALID_HANDLE) return; - // std::cout << "Deallocating block " << index << std::endl; + DEBUG_PRINT("Deallocating block " << index); Block &block = m_Blocks[index]; bool owns_index = true; @@ -127,12 +203,12 @@ void FreeListAllocator::Deallocate(BlockHandle index) // Try to coalesce the block with its neighbors. while (true) { - BlockHandle prev = -1; + BlockHandle prev = INVALID_HANDLE; BlockHandle next = m_Head.load(); - // std::cout << "Finding the prev and next blocks." << std::endl; + DEBUG_PRINT("Finding the prev and next blocks."); - while (next != -1 && m_Blocks[next].descriptor.load().GetOffset() < block.descriptor.load().GetOffset()) + while (next != INVALID_HANDLE && m_Blocks[next].descriptor.load().GetOffset() < block.descriptor.load().GetOffset()) { prev = next; next = m_Blocks[next].next.load(); @@ -184,12 +260,11 @@ void FreeListAllocator::Deallocate(BlockHandle index) // Try to coalesce two blocks, one of which is owned by us. // Return whether the coallescing was successful. -template -bool FreeListAllocator::TryCoalesceBlocks(BlockHandle a, BlockHandle b, bool owner_of_a) +bool FreeListAllocator::TryCoalesceBlocks(BlockHandle a, BlockHandle b, bool owner_of_a) { - // std::cout << "Attempting to coalesce blocks " << a << " and " << b << std::endl; + DEBUG_PRINT("Attempting to coalesce blocks " << a << " and " << b); - if (a == -1 || b == -1) + if (a == INVALID_HANDLE || b == INVALID_HANDLE) return false; BlockDescriptor descriptor_a = m_Blocks[a].descriptor.load(); @@ -201,7 +276,7 @@ bool FreeListAllocator::TryCoalesceBlocks(BlockHandle a if (descriptor_a.GetOffset() + descriptor_a.GetSize() != descriptor_b.GetOffset()) { // The blocks are not adjacent. - // std::cout << "The blocks are not adjacent." << std::endl; + DEBUG_PRINT("The blocks are not adjacent."); return false; } } @@ -210,7 +285,7 @@ bool FreeListAllocator::TryCoalesceBlocks(BlockHandle a if (descriptor_b.GetOffset() + descriptor_b.GetSize() != descriptor_a.GetOffset()) { // The blocks are not adjacent. - // std::cout << "The blocks are not adjacent." << std::endl; + DEBUG_PRINT("The blocks are not adjacent."); return false; } } @@ -218,7 +293,7 @@ bool FreeListAllocator::TryCoalesceBlocks(BlockHandle a if (!descriptor_b.IsFree()) { // The B block was not free and as such cannot be coalesced. - // std::cout << "The B block was not free." << std::endl; + DEBUG_PRINT("The B block was not free."); return false; } @@ -234,7 +309,7 @@ bool FreeListAllocator::TryCoalesceBlocks(BlockHandle a if (!m_Blocks[a].descriptor.compare_exchange_strong(descriptor_a_old, descriptor_a)) { // The block was changed by someone else. We cannot own it. - // std::cout << "Starting to own block A failed." << std::endl; + DEBUG_PRINT("Starting to own block A failed."); return false; } } @@ -253,7 +328,7 @@ bool FreeListAllocator::TryCoalesceBlocks(BlockHandle a new_descriptor.SetSize(descriptor_a.GetSize() + descriptor_b.GetSize()); } - // std::cout << "Trying to set the new descriptor of the B block, with " << new_descriptor.GetOffset() << " and " << new_descriptor.GetSize() << std::endl; + DEBUG_PRINT("Trying to set the new descriptor of the B block, with " << new_descriptor.GetOffset() << " and " << new_descriptor.GetSize()); // Try to set the new descriptor of the B block. if (!m_Blocks[b].descriptor.compare_exchange_weak(descriptor_b, new_descriptor)) @@ -266,17 +341,16 @@ bool FreeListAllocator::TryCoalesceBlocks(BlockHandle a return false; } - // std::cout << "Succesfully coalesced blocks " << a << " and " << b << std::endl; + DEBUG_PRINT("Succesfully coalesced blocks " << a << " and " << b); return true; } -template -typename FreeListAllocator::BlockHandle FreeListAllocator::FindFirstFreeBlock(std::size_t size) +FreeListAllocator::BlockHandle FreeListAllocator::FindFirstFreeBlock(Size size) { BlockHandle current = m_Head.load(); - while (current != -1) + while (current != INVALID_HANDLE) { Block &block = m_Blocks[current]; BlockDescriptor descriptor = block.descriptor.load(); @@ -290,26 +364,24 @@ typename FreeListAllocator::BlockHandle FreeListAllocat current = block.next.load(); } - return -1; + return INVALID_HANDLE; } -template -std::size_t FreeListAllocator::GetOffset(BlockHandle index) +std::size_t FreeListAllocator::GetOffset(BlockHandle index) { return m_Blocks[index].descriptor.load().GetOffset(); } -template -void FreeListAllocator::InsertBlockSorted(BlockHandle index) +void FreeListAllocator::InsertBlockSorted(BlockHandle index) { - BlockHandle previous = -1; + BlockHandle previous = INVALID_HANDLE; BlockHandle current; do { current = m_Head.load(); - while (current != -1 && m_Blocks[current].descriptor.load().GetOffset() < m_Blocks[index].descriptor.load().GetOffset()) + while (current != INVALID_HANDLE && m_Blocks[current].descriptor.load().GetOffset() < m_Blocks[index].descriptor.load().GetOffset()) { previous = current; current = m_Blocks[current].next; @@ -318,64 +390,63 @@ void FreeListAllocator::InsertBlockSorted(BlockHandle i if (current == index) { // The block is already on the free list. - // std::cout << "Block " << index << " is already on the free list." << std::endl; + DEBUG_PRINT("Block " << index << " is already on the free list."); return; } m_Blocks[index].next = current; - if (previous == -1) + if (previous == INVALID_HANDLE) { - // std::cout << "Attempting to insert the block at the head." << std::endl; + DEBUG_PRINT("Attempting to insert the block at the head."); if (m_Head.compare_exchange_weak(current, index)) { // Successfully inserted the block. - // std::cout << "Successfully inserted the block." << std::endl; + DEBUG_PRINT("Successfully inserted the block."); return; } } else { - // std::cout << "Attempting to insert the block in the middle." << std::endl; + DEBUG_PRINT("Attempting to insert the block in the middle."); if (m_Blocks[previous].next.compare_exchange_weak(current, index)) { // Successfully inserted the block. - // std::cout << "Successfully inserted the block." << std::endl; + DEBUG_PRINT("Successfully inserted the block."); return; } } } while (true); } -template -bool FreeListAllocator::RemoveBlock(BlockHandle index) +bool FreeListAllocator::RemoveBlock(BlockHandle index) { - BlockHandle previous = -1; + BlockHandle previous = INVALID_HANDLE; BlockHandle current; - // std::cout << "Removing block " << index << std::endl; + DEBUG_PRINT("Removing block " << index); do { current = m_Head.load(); // Find the previous block. - while (current != index && current != -1) + while (current != index && current != INVALID_HANDLE) { previous = current; current = m_Blocks[current].next; } - if (current == -1) + if (current == INVALID_HANDLE) { // The block was not on the free list, even though it was supposed to be free. - // std::cout << "Block was not on the free list." << std::endl; + DEBUG_PRINT("Block was not on the free list."); return false; } - if (previous == -1) + if (previous == INVALID_HANDLE) { if (m_Head.compare_exchange_weak(current, m_Blocks[index].next)) { @@ -394,17 +465,16 @@ bool FreeListAllocator::RemoveBlock(BlockHandle index) } while (true); } -template -bool FreeListAllocator::MarkBlockAsFree(BlockHandle handle, bool mark_free) +bool FreeListAllocator::MarkBlockAsFree(BlockHandle handle, bool mark_free) { - // std::cout << "Marking block " << handle << " as " << (mark_free ? "free" : "allocated") << std::endl; + DEBUG_PRINT("Marking block " << handle << " as " << (mark_free ? "free" : "allocated")); BlockDescriptor descriptor = m_Blocks[handle].descriptor.load(); if (descriptor.IsFree() == mark_free) { // The block is already in the desired state. - // std::cout << "The block is already in the desired state." << std::endl; + DEBUG_PRINT("The block is already in the desired state."); return false; } @@ -414,21 +484,20 @@ bool FreeListAllocator::MarkBlockAsFree(BlockHandle han if (!m_Blocks[handle].descriptor.compare_exchange_strong(descriptor, new_descriptor)) { // The block was changed in the meantime and we were unsuccessful. - // std::cout << "The block was changed in the meantime." << std::endl; + DEBUG_PRINT("The block was changed in the meantime."); return false; } - // std::cout << "Successfully marked the block." << std::endl; + DEBUG_PRINT("Successfully marked the block."); return true; } -template -void FreeListAllocator::PrintState() +void FreeListAllocator::PrintState() { BlockHandle current = m_Head; - while (current != -1) + while (current != INVALID_HANDLE) { Block &block = m_Blocks[current]; BlockDescriptor descriptor = block.descriptor.load(); @@ -439,13 +508,12 @@ void FreeListAllocator::PrintState() } } -template -size_t FreeListAllocator::GetNumFreeBlocks() const +size_t FreeListAllocator::GetNumFreeBlocks() const { size_t count = 0; BlockHandle current = m_Head; - while (current != -1) + while (current != INVALID_HANDLE) { ++count; current = m_Blocks[current].next; diff --git a/catkit_core/FreeListAllocator.h b/catkit_core/FreeListAllocator.h index f14154f6..942b868f 100644 --- a/catkit_core/FreeListAllocator.h +++ b/catkit_core/FreeListAllocator.h @@ -5,72 +5,56 @@ #include #include +#include // A simple lock-free free list allocator. -template class FreeListAllocator { public: - using BlockHandle = std::int32_t; + using BlockHandle = PoolAllocator::BlockHandle; using Offset = std::uint32_t; using Size = std::uint32_t; - static const BlockHandle INVALID_HANDLE = -1; + static const BlockHandle INVALID_HANDLE = PoolAllocator::INVALID_HANDLE; + FreeListAllocator(void *metadata_buffer); + + static std::size_t ComputeMetadataBufferSize(std::size_t max_num_blocks); + + void Initialize(std::size_t max_num_blocks, std::size_t alignment, std::size_t buffer_size); + + BlockHandle Allocate(std::size_t size); + void Deallocate(BlockHandle index); + + std::size_t GetOffset(BlockHandle index); + + void PrintState(); + size_t GetNumFreeBlocks() const; + +private: // A unique descriptor of the block. class BlockDescriptor { public: - BlockDescriptor() - { - } - - BlockDescriptor(Offset offset, Size size, bool is_free) - { - Set(offset, size, is_free); - } - - void Set(const Offset &offset, const Size &size, const bool &is_free) - { - m_Offset = offset; - m_SizeAndFreeFlag = (size & ~_FREE_FLAG) | (_FREE_FLAG * is_free); - } - - Offset GetOffset() const - { - return m_Offset; - } - - void SetOffset(const Offset &new_offset) - { - m_Offset = new_offset; - } - - Size GetSize() const - { - return m_SizeAndFreeFlag & ~_FREE_FLAG; - } - - void SetSize(const Size &new_size) - { - m_SizeAndFreeFlag = (new_size & ~_FREE_FLAG) | (m_SizeAndFreeFlag & _FREE_FLAG); - } - - bool IsFree() const - { - return m_SizeAndFreeFlag & _FREE_FLAG; - } - - void SetFree(const bool &is_free) - { - m_SizeAndFreeFlag = (m_SizeAndFreeFlag & ~_FREE_FLAG) | (_FREE_FLAG * is_free); - } + BlockDescriptor(); + BlockDescriptor(Offset offset, Size size, bool is_free); + + void Set(const Offset &offset, const Size &size, const bool &is_free); + + Offset GetOffset() const; + void SetOffset(const Offset &new_offset); + + Size GetSize() const; + void SetSize(const Size &new_size); + + bool IsFree() const; + void SetFree(const bool &is_free); private: Offset m_Offset; Size m_SizeAndFreeFlag; - static const Offset _FREE_FLAG = 0x80000000; + static constexpr Offset _FREE_FLAG = Offset(1) << 31; }; // Check that the BlockDescriptor is lock-free atomic. @@ -82,24 +66,35 @@ class FreeListAllocator std::atomic next; }; - FreeListAllocator(std::size_t buffer_size); - ~FreeListAllocator(); + struct Header + { + std::uint8_t version[4]; + std::uint32_t max_num_blocks; + Size alignment; + Size total_buffer_size; + std::atomic head; + }; - BlockHandle Allocate(std::size_t size); - void Deallocate(BlockHandle index); + // Ensure a specific memory layout. + static_assert(offsetof(Header, version) == 0); + static_assert(offsetof(Header, max_num_blocks) == 4); + static_assert(offsetof(Header, alignment) == 8); + static_assert(offsetof(Header, total_buffer_size) == 12); + static_assert(offsetof(Header, head) == 16); + static_assert(sizeof(Header) == 20); - std::size_t GetOffset(BlockHandle index); + Header &m_Header; - void PrintState(); - size_t GetNumFreeBlocks() const; + std::uint32_t &m_MaxNumBlocks; + std::uint32_t &m_Alignment; + std::atomic &m_Head; -private: PoolAllocator m_BlockAllocator; - Block m_Blocks[MaxNumBlocks]; + Block *m_Blocks; - std::atomic m_Head; + void *m_MetadataBuffer; - BlockHandle FindFirstFreeBlock(std::size_t size); + BlockHandle FindFirstFreeBlock(Size size); void InsertBlockSorted(BlockHandle index); bool RemoveBlock(BlockHandle index); @@ -109,6 +104,4 @@ class FreeListAllocator bool TryCoalesceBlocks(BlockHandle a, BlockHandle b, bool owner_of_a); }; -#include "FreeListAllocator.inl" - #endif // FREE_LIST_ALLOCATOR_H From 0ff6bcfb5777e9800ace40fffa00a6fdff3ec6f8 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Wed, 11 Dec 2024 12:06:00 -0800 Subject: [PATCH 20/34] Fix import to use C++ version. --- catkit_core/FreeListAllocator.h | 1 - catkit_core/PoolAllocator.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/catkit_core/FreeListAllocator.h b/catkit_core/FreeListAllocator.h index 942b868f..e035c464 100644 --- a/catkit_core/FreeListAllocator.h +++ b/catkit_core/FreeListAllocator.h @@ -5,7 +5,6 @@ #include #include -#include // A simple lock-free free list allocator. class FreeListAllocator diff --git a/catkit_core/PoolAllocator.h b/catkit_core/PoolAllocator.h index 4b5397c4..2558120a 100644 --- a/catkit_core/PoolAllocator.h +++ b/catkit_core/PoolAllocator.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include // A simple lock-free pool allocator. class PoolAllocator From d5ed5af64a07bcbb4b530bc3d6862599f9207787 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Wed, 11 Dec 2024 14:25:57 -0800 Subject: [PATCH 21/34] Copy using std::copy(). --- benchmarks/pool_allocator.cpp | 2 +- catkit_core/FreeListAllocator.cpp | 4 +++- catkit_core/PoolAllocator.cpp | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/benchmarks/pool_allocator.cpp b/benchmarks/pool_allocator.cpp index 9c642e1c..f327cf4e 100644 --- a/benchmarks/pool_allocator.cpp +++ b/benchmarks/pool_allocator.cpp @@ -12,7 +12,7 @@ void benchmark_linux_scalability() PoolAllocator allocator(buffer); allocator.Initialize(CAPACITY); - auto *handles = new size_t[N]; + auto *handles = new PoolAllocator::BlockHandle[N]; auto start = GetTimeStamp(); diff --git a/catkit_core/FreeListAllocator.cpp b/catkit_core/FreeListAllocator.cpp index df1ff108..08ed799b 100644 --- a/catkit_core/FreeListAllocator.cpp +++ b/catkit_core/FreeListAllocator.cpp @@ -1,6 +1,8 @@ #include "FreeListAllocator.h" #include "Timing.h" + #include +#include //#define DEBUG_PRINT(a) std::cout << a << std::endl #define DEBUG_PRINT(a) @@ -76,7 +78,7 @@ std::size_t FreeListAllocator::ComputeMetadataBufferSize(std::size_t max_num_blo void FreeListAllocator::Initialize(std::size_t max_num_blocks, std::size_t alignment, std::size_t buffer_size) { - std::memcpy(m_Header.version, VERSION, sizeof(VERSION)); + std::copy(VERSION, VERSION + sizeof(VERSION), m_Header.version); m_Header.max_num_blocks = max_num_blocks; m_Header.alignment = alignment; m_Header.total_buffer_size = buffer_size; diff --git a/catkit_core/PoolAllocator.cpp b/catkit_core/PoolAllocator.cpp index a7c38f55..6882986d 100644 --- a/catkit_core/PoolAllocator.cpp +++ b/catkit_core/PoolAllocator.cpp @@ -1,5 +1,7 @@ #include "PoolAllocator.h" +#include + const std::uint8_t VERSION[4] = {0, 0, 0, 0}; PoolAllocator::PoolAllocator(void *metadata_buffer) @@ -13,7 +15,7 @@ PoolAllocator::PoolAllocator(void *metadata_buffer) void PoolAllocator::Initialize(std::uint32_t capacity) { // Set version and capacity. - std::memcpy(m_Header.version, VERSION, sizeof(VERSION)); + std::copy(VERSION, VERSION + sizeof(VERSION), m_Header.version); m_Capacity = capacity; // Initialize the linked list. From 10625631f9e71abe755fbbd86ae5f7f1633be3c4 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Wed, 11 Dec 2024 17:21:47 -0800 Subject: [PATCH 22/34] Rename and fix hash map implementation. --- catkit_core/{SharedHashMap.h => HashMap.h} | 72 ++++++++++++++-------- 1 file changed, 46 insertions(+), 26 deletions(-) rename catkit_core/{SharedHashMap.h => HashMap.h} (65%) diff --git a/catkit_core/SharedHashMap.h b/catkit_core/HashMap.h similarity index 65% rename from catkit_core/SharedHashMap.h rename to catkit_core/HashMap.h index 3177792d..df3af3dd 100644 --- a/catkit_core/SharedHashMap.h +++ b/catkit_core/HashMap.h @@ -1,5 +1,5 @@ -#ifndef SHARED_HASH_MAP_H -#define SHARED_HASH_MAP_H +#ifndef HASH_MAP_H +#define HASH_MAP_H #include #include @@ -48,7 +48,7 @@ uint32_t murmurhash3(const std::string &key, uint32_t seed = 0) k1 *= c2; h ^= k1; case 0: - // Do nothing. + ; // Do nothing. } h ^= len; @@ -65,7 +65,7 @@ uint32_t murmurhash3(const std::string &key, uint32_t seed = 0) // * entries cannot be removed. // * key is string type of fixed size. template -class SharedHashMap +class HashMap { private: enum EntryFlags @@ -83,7 +83,7 @@ class SharedHashMap Value value; }; - Entry m_Data[Size]; + Entry *m_Data; size_t hash(const std::string &key) const { @@ -91,14 +91,28 @@ class SharedHashMap } public: - bool insert(const std::string &key, const Value &value) + HashMap(void *buffer) + : m_Data(reinterpret_cast(buffer)) { - if (key.size() >= MaxKeyLength) + } + + static std::size_t CalculateBufferSize() + { + return sizeof(Entry) * Size; + } + + void Initialize() + { + for (size_t i = 0; i < Size; ++i) { - // Key is too long to fit in the fixed-size buffer. - return false; + m_Data[i].flags = EntryFlags::UNOCCUPIED; + + std::fill(m_Data[i].key, m_Data[i].key + MaxKeyLength, '\0'); } + } + bool Insert(const std::string &key, const Value &value) + { size_t index = hash(key); for (size_t i = 0; i < Size; ++i) @@ -106,26 +120,32 @@ class SharedHashMap size_t probe = (index + i) % Size; // Try to use this entry. - auto flags = m_Data[probe].flags.compare_exchange_strong(EntryFlags::UNOCCUPIED, EntryFlags::INITIALIZING); + EntryFlags flags = EntryFlags::UNOCCUPIED; - // If this entry is still initializing, do a spin-wait until it's occupied. - // This should almost never be necessary and should only last a short while if it does. - while (flags == EntryFlags::INITIALIZING) - { - flags = m_Data[probe].flags.load(); - } + bool success = m_Data[probe].flags.compare_exchange_strong(flags, EntryFlags::INITIALIZING, std::memory_order_acq_rel); - if (flags == EntryFlags::OCCUPIED) + if (!success) { - // Check if the key is our key. - if (std::strcmp(m_Data[probe].key, key.c_str()) == 0) + // The entry is either occupied or still initializing. + + // If this entry is still initializing, do a spin-wait until it's occupied. + // This should almost never be necessary and should only last a short while if it does. + while (flags == EntryFlags::INITIALIZING) { - // Key already exists. - return false; + flags = m_Data[probe].flags.load(std::memory_order_acquire); } - } - if (flags == EntryFlags::UNOCCUPIED) + if (flags == EntryFlags::OCCUPIED) + { + // Check if the key is our key. + if (std::strcmp(m_Data[probe].key, key.c_str()) == 0) + { + // Key already exists. + return false; + } + } + } + else { // Copy key ensuring null-termination. std::strncpy(m_Data[probe].key, key.c_str(), MaxKeyLength - 1); @@ -135,7 +155,7 @@ class SharedHashMap m_Data[probe].value = value; // Make occupied. - m_Data[probe].flags = EntryFlags::OCCUPIED; + m_Data[probe].flags.store(EntryFlags::OCCUPIED, std::memory_order_release); return true; } @@ -145,7 +165,7 @@ class SharedHashMap return false; } - const Value* find(const std::string &key) const + const Value *Find(const std::string &key) const { if (key.size() >= MaxKeyLength) { @@ -177,4 +197,4 @@ class SharedHashMap } }; -#endif // SHARED_HASH_MAP_H +#endif // HASH_MAP_H From 3120d8423df5f2fbff7386d97bf6021ba8c8a0fd Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Wed, 11 Dec 2024 17:22:32 -0800 Subject: [PATCH 23/34] Convert indentation to tabs. --- catkit_core/FreeListAllocator.cpp | 2 +- catkit_core/PoolAllocator.cpp | 96 +++++++++++++++---------------- 2 files changed, 49 insertions(+), 49 deletions(-) diff --git a/catkit_core/FreeListAllocator.cpp b/catkit_core/FreeListAllocator.cpp index 08ed799b..dbb662c8 100644 --- a/catkit_core/FreeListAllocator.cpp +++ b/catkit_core/FreeListAllocator.cpp @@ -78,7 +78,7 @@ std::size_t FreeListAllocator::ComputeMetadataBufferSize(std::size_t max_num_blo void FreeListAllocator::Initialize(std::size_t max_num_blocks, std::size_t alignment, std::size_t buffer_size) { - std::copy(VERSION, VERSION + sizeof(VERSION), m_Header.version); + std::copy(VERSION, VERSION + sizeof(VERSION), m_Header.version); m_Header.max_num_blocks = max_num_blocks; m_Header.alignment = alignment; m_Header.total_buffer_size = buffer_size; diff --git a/catkit_core/PoolAllocator.cpp b/catkit_core/PoolAllocator.cpp index 6882986d..a5896020 100644 --- a/catkit_core/PoolAllocator.cpp +++ b/catkit_core/PoolAllocator.cpp @@ -5,77 +5,77 @@ const std::uint8_t VERSION[4] = {0, 0, 0, 0}; PoolAllocator::PoolAllocator(void *metadata_buffer) - : m_Header(*static_cast
(metadata_buffer)), - m_Capacity(m_Header.capacity), - m_Head(m_Header.head), - m_Next(reinterpret_cast(static_cast(metadata_buffer) + sizeof(Header))) + : m_Header(*static_cast
(metadata_buffer)), + m_Capacity(m_Header.capacity), + m_Head(m_Header.head), + m_Next(reinterpret_cast(static_cast(metadata_buffer) + sizeof(Header))) { } void PoolAllocator::Initialize(std::uint32_t capacity) { - // Set version and capacity. - std::copy(VERSION, VERSION + sizeof(VERSION), m_Header.version); - m_Capacity = capacity; + // Set version and capacity. + std::copy(VERSION, VERSION + sizeof(VERSION), m_Header.version); + m_Capacity = capacity; - // Initialize the linked list. - m_Head.store(0, std::memory_order_relaxed); + // Initialize the linked list. + m_Head.store(0, std::memory_order_relaxed); - for (std::size_t i = 0; i < m_Capacity; ++i) - { - if (i == m_Capacity - 1) - { - m_Next[i] = INVALID_HANDLE; - } - else - { - m_Next[i] = i + 1; - } - } + for (std::size_t i = 0; i < m_Capacity; ++i) + { + if (i == m_Capacity - 1) + { + m_Next[i] = INVALID_HANDLE; + } + else + { + m_Next[i] = i + 1; + } + } } std::size_t PoolAllocator::CalculateMetadataBufferSize(std::uint32_t capacity) { - std::size_t size = sizeof(Header); - size += capacity * sizeof(std::atomic); + std::size_t size = sizeof(Header); + size += capacity * sizeof(std::atomic); - return size; + return size; } PoolAllocator::BlockHandle PoolAllocator::Allocate() { - BlockHandle head = m_Head.load(std::memory_order_relaxed); - BlockHandle next; + BlockHandle head = m_Head.load(std::memory_order_relaxed); + BlockHandle next; - // Pop the first element from the linked list. - do - { - // Check if the pool is empty. - if (head == INVALID_HANDLE) - { - return INVALID_HANDLE; - } + // Pop the first element from the linked list. + do + { + // Check if the pool is empty. + if (head == INVALID_HANDLE) + { + return INVALID_HANDLE; + } - next = m_Next[head].load(std::memory_order_relaxed); - } while (!m_Head.compare_exchange_weak(head, next)); + next = m_Next[head].load(std::memory_order_relaxed); + } while (!m_Head.compare_exchange_weak(head, next)); - // Return the popped element. - return head; + // Return the popped element. + return head; } void PoolAllocator::Deallocate(BlockHandle index) { - // Check if the element is within the pool bounds. - if (index >= m_Capacity) - { - return; - } + // Check if the element is within the pool bounds. + if (index >= m_Capacity) + { + return; + } - BlockHandle head = m_Head.load(std::memory_order_relaxed);; + BlockHandle head = m_Head.load(std::memory_order_relaxed);; - // Push the element back on the front of the linked list. - do - { - m_Next[index] = head; - } while (!m_Head.compare_exchange_weak(head, index)); + // Push the element back on the front of the linked list. + do + { + m_Next[index] = head; + } while (!m_Head.compare_exchange_weak(head, index)); } From 1fba625f83757b66b1db0f42b5170bed7a1f4489 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Wed, 11 Dec 2024 17:22:46 -0800 Subject: [PATCH 24/34] Add benchmark for hash map. --- benchmarks/CMakeLists.txt | 6 +++++ benchmarks/hash_map.cpp | 50 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 benchmarks/hash_map.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 7bbce84d..a76ebf1e 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -34,9 +34,15 @@ add_executable(pool_allocator pool_allocator.cpp) target_include_directories(pool_allocator PUBLIC ../catkit_core) target_link_libraries(pool_allocator PUBLIC catkit_core) +# Hash map benchmark +add_executable(hash_map hash_map.cpp) +target_include_directories(hash_map PUBLIC ../catkit_core) +target_link_libraries(hash_map PUBLIC catkit_core) + # Add install files install(TARGETS datastream_latency DESTINATION bin) install(TARGETS datastream_submit DESTINATION bin) install(TARGETS timestamp DESTINATION bin) install(TARGETS free_list_allocator DESTINATION bin) install(TARGETS pool_allocator DESTINATION bin) +install(TARGETS hash_map DESTINATION bin) diff --git a/benchmarks/hash_map.cpp b/benchmarks/hash_map.cpp new file mode 100644 index 00000000..60a75708 --- /dev/null +++ b/benchmarks/hash_map.cpp @@ -0,0 +1,50 @@ +#include "HashMap.h" +#include "Timing.h" + +#include + +int main(int argc, char **argv) +{ + typedef HashMap MyHashMap; + + std::size_t buffer_size = MyHashMap::CalculateBufferSize(); + char *buffer = new char[buffer_size]; + + MyHashMap map(buffer); + map.Initialize(); + + std::uint64_t total_time = 0; + std::size_t N = 1000; + + for (size_t i = 0; i < N; ++i) + { + std::string key = "key" + std::to_string(i); + + auto start = GetTimeStamp(); + map.Insert(key, i); + auto end = GetTimeStamp(); + + total_time += end - start; + } + + std::cout << "Insertion time: " << total_time / N << " ns" << std::endl; + + total_time = 0; + + for (size_t i = 0; i < N; ++i) + { + std::string key = "key" + std::to_string(i); + + auto start = GetTimeStamp(); + const int *value = map.Find(key); + auto end = GetTimeStamp(); + + total_time += end - start; + } + + std::cout << "Lookup time: " << total_time / N << " ns" << std::endl; + + delete[] buffer; + + return 0; +} From 1346321ab5a831eb4eb9c906a740ce47b19d2ba1 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Wed, 11 Dec 2024 18:37:36 -0800 Subject: [PATCH 25/34] Import cstring library for strcmp(). --- catkit_core/HashMap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/catkit_core/HashMap.h b/catkit_core/HashMap.h index df3af3dd..79e98e0a 100644 --- a/catkit_core/HashMap.h +++ b/catkit_core/HashMap.h @@ -5,6 +5,7 @@ #include #include #include +#include // MurmurHash3 32-bit version uint32_t murmurhash3(const std::string &key, uint32_t seed = 0) From 9545c86ccad15e6e97688a00fedbb06f262008fa Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Wed, 11 Dec 2024 19:12:12 -0800 Subject: [PATCH 26/34] Use size_t for number of blocks. --- benchmarks/hash_map.cpp | 2 +- catkit_core/HashMap.h | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/benchmarks/hash_map.cpp b/benchmarks/hash_map.cpp index 60a75708..82477c58 100644 --- a/benchmarks/hash_map.cpp +++ b/benchmarks/hash_map.cpp @@ -14,7 +14,7 @@ int main(int argc, char **argv) map.Initialize(); std::uint64_t total_time = 0; - std::size_t N = 1000; + std::size_t N = 5000; for (size_t i = 0; i < N; ++i) { diff --git a/catkit_core/HashMap.h b/catkit_core/HashMap.h index 79e98e0a..cabf698e 100644 --- a/catkit_core/HashMap.h +++ b/catkit_core/HashMap.h @@ -18,9 +18,9 @@ uint32_t murmurhash3(const std::string &key, uint32_t seed = 0) const uint32_t c2 = 0x1b873593; // Partition in blocks of 4 bytes. - const int nblocks = len / 4; + const size_t nblocks = len / 4; const uint32_t* blocks = reinterpret_cast(data); - for (int i = 0; i < nblocks; i++) + for (size_t i = 0; i < nblocks; i++) { uint32_t k = blocks[i]; k *= c1; @@ -149,7 +149,8 @@ class HashMap else { // Copy key ensuring null-termination. - std::strncpy(m_Data[probe].key, key.c_str(), MaxKeyLength - 1); + std::size_t key_length = std::min(key.size(), MaxKeyLength - 1); + key.copy(m_Data[probe].key, key_length); m_Data[probe].key[MaxKeyLength - 1] = '\0'; // Copy m_Data. @@ -180,7 +181,7 @@ class HashMap { size_t probe = (index + i) % Size; - EntryFlags flags = m_Data[probe].flags.load(); + EntryFlags flags = m_Data[probe].flags.load(std::memory_order_acquire); if (flags == EntryFlags::OCCUPIED && std::strcmp(m_Data[probe].key, key.c_str()) == 0) { From 61d1e0ed1490df73954e1a4d2248c993b7d0d022 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Wed, 11 Dec 2024 19:12:29 -0800 Subject: [PATCH 27/34] Compute time per operation in ns. --- benchmarks/free_list_allocator.cpp | 3 +++ benchmarks/pool_allocator.cpp | 1 + 2 files changed, 4 insertions(+) diff --git a/benchmarks/free_list_allocator.cpp b/benchmarks/free_list_allocator.cpp index 2ded58ce..22a8bb7c 100644 --- a/benchmarks/free_list_allocator.cpp +++ b/benchmarks/free_list_allocator.cpp @@ -36,6 +36,7 @@ void benchmark_linux_scalability() std::cout << "Linux Scalability:" << std::endl; std::cout << "Time: " << (end - start) / 1e9 << " sec" << std::endl; std::cout << "Throughput: " << 2 * N / ((end - start) / 1e9) << " ops/s" << std::endl; + std::cout << "Time per operation: " << (end - start) / (2 * N) << " ns" << std::endl; delete[] handles; delete[] buffer; @@ -76,6 +77,7 @@ void benchmark_threadtest() std::cout << "Threadtest:" << std::endl; std::cout << "Time: " << (end - start) / 1e9 << " sec" << std::endl; std::cout << "Throughput: " << 2 * N * M / ((end - start) / 1e9) << " ops/s" << std::endl; + std::cout << "Time per operation: " << (end - start) / (2 * N * M) << " ns" << std::endl; delete[] handles; } @@ -129,6 +131,7 @@ void benchmark_larson() std::cout << "Larson benchmark:" << std::endl; std::cout << "Time: " << (end - start) / 1e9 << " sec" << std::endl; std::cout << "Throughput: " << (N * 2 - M) / ((end - start) / 1e9) << " ops/s" << std::endl; + std::cout << "Time per operation: " << (end - start) / (2 * N - M) << " ns" << std::endl; delete[] handles; } diff --git a/benchmarks/pool_allocator.cpp b/benchmarks/pool_allocator.cpp index f327cf4e..6dd2df38 100644 --- a/benchmarks/pool_allocator.cpp +++ b/benchmarks/pool_allocator.cpp @@ -31,6 +31,7 @@ void benchmark_linux_scalability() std::cout << "Linux Scalability:" << std::endl; std::cout << "Time: " << (end - start) / 1e9 << " sec" << std::endl; std::cout << "Throughput: " << 2 * N / ((end - start) / 1e9) << " ops/s" << std::endl; + std::cout << "Time per operation: " << (end - start) / (2 * N) << " ns" << std::endl; delete[] handles; delete[] buffer; From 613d2635412c0fd3dcb8e224869eafdf00cce305 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Thu, 12 Dec 2024 17:14:49 -0800 Subject: [PATCH 28/34] Add maximum key length check, safe string compare, and byte packing. --- benchmarks/hash_map.cpp | 17 ++++++++++++++--- catkit_core/HashMap.h | 30 +++++++++++++++++++++--------- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/benchmarks/hash_map.cpp b/benchmarks/hash_map.cpp index 82477c58..d4a29c30 100644 --- a/benchmarks/hash_map.cpp +++ b/benchmarks/hash_map.cpp @@ -5,9 +5,10 @@ int main(int argc, char **argv) { - typedef HashMap MyHashMap; + typedef HashMap MyHashMap; std::size_t buffer_size = MyHashMap::CalculateBufferSize(); + std::cout << "Buffer size: " << buffer_size << " bytes" << std::endl; char *buffer = new char[buffer_size]; MyHashMap map(buffer); @@ -21,9 +22,14 @@ int main(int argc, char **argv) std::string key = "key" + std::to_string(i); auto start = GetTimeStamp(); - map.Insert(key, i); + bool success = map.Insert(key, uint16_t(i)); auto end = GetTimeStamp(); + if (!success) + { + std::cout << "Insertion failed." << std::endl; + } + total_time += end - start; } @@ -36,9 +42,14 @@ int main(int argc, char **argv) std::string key = "key" + std::to_string(i); auto start = GetTimeStamp(); - const int *value = map.Find(key); + auto *value = map.Find(key); auto end = GetTimeStamp(); + if (value == nullptr || *value != i) + { + std::cout << "Key not found." << std::endl; + } + total_time += end - start; } diff --git a/catkit_core/HashMap.h b/catkit_core/HashMap.h index cabf698e..4e065375 100644 --- a/catkit_core/HashMap.h +++ b/catkit_core/HashMap.h @@ -6,6 +6,7 @@ #include #include #include +#include // MurmurHash3 32-bit version uint32_t murmurhash3(const std::string &key, uint32_t seed = 0) @@ -69,7 +70,7 @@ template class HashMap { private: - enum EntryFlags + enum EntryFlags : uint8_t { UNOCCUPIED = 0, INITIALIZING = 1, @@ -78,10 +79,10 @@ class HashMap struct Entry { + Value value; + std::atomic flags = EntryFlags::UNOCCUPIED; char key[MaxKeyLength]; - - Value value; }; Entry *m_Data; @@ -114,6 +115,12 @@ class HashMap bool Insert(const std::string &key, const Value &value) { + if (key.size() > MaxKeyLength) + { + // Key is too long to fit in the fixed-size buffer. + return false; + } + size_t index = hash(key); for (size_t i = 0; i < Size; ++i) @@ -139,7 +146,7 @@ class HashMap if (flags == EntryFlags::OCCUPIED) { // Check if the key is our key. - if (std::strcmp(m_Data[probe].key, key.c_str()) == 0) + if (AreKeysTheSame(m_Data[probe].key, key.c_str())) { // Key already exists. return false; @@ -148,10 +155,9 @@ class HashMap } else { - // Copy key ensuring null-termination. - std::size_t key_length = std::min(key.size(), MaxKeyLength - 1); + // Copy key. + std::size_t key_length = std::min(key.size(), MaxKeyLength); key.copy(m_Data[probe].key, key_length); - m_Data[probe].key[MaxKeyLength - 1] = '\0'; // Copy m_Data. m_Data[probe].value = value; @@ -169,7 +175,7 @@ class HashMap const Value *Find(const std::string &key) const { - if (key.size() >= MaxKeyLength) + if (key.size() > MaxKeyLength) { // Key is too long to fit in the fixed-size buffer. return nullptr; @@ -183,13 +189,14 @@ class HashMap EntryFlags flags = m_Data[probe].flags.load(std::memory_order_acquire); - if (flags == EntryFlags::OCCUPIED && std::strcmp(m_Data[probe].key, key.c_str()) == 0) + if (flags == EntryFlags::OCCUPIED && AreKeysTheSame(m_Data[probe].key, key.c_str())) { return &m_Data[probe].value; } if (flags != EntryFlags::OCCUPIED) { + // Key not found. break; } } @@ -197,6 +204,11 @@ class HashMap // Key not found. return nullptr; } + + bool AreKeysTheSame(const char *ky1, const char *ky2) const + { + return std::strncmp(ky1, ky2, MaxKeyLength) == 0; + } }; #endif // HASH_MAP_H From 97cd3d0bc7f01cc227f3f87499cda1ee57bbf593 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Thu, 19 Dec 2024 20:16:47 -0800 Subject: [PATCH 29/34] Do not return a const pointer. The results should be readable. --- catkit_core/HashMap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catkit_core/HashMap.h b/catkit_core/HashMap.h index 4e065375..b54466e8 100644 --- a/catkit_core/HashMap.h +++ b/catkit_core/HashMap.h @@ -173,7 +173,7 @@ class HashMap return false; } - const Value *Find(const std::string &key) const + Value *Find(const std::string &key) const { if (key.size() > MaxKeyLength) { From 25c9c5d03abc84dc716f2232fd31aab5bda14d40 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Thu, 19 Dec 2024 20:54:55 -0800 Subject: [PATCH 30/34] Use string_view instead of c or c++ strings. --- catkit_core/HashMap.h | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/catkit_core/HashMap.h b/catkit_core/HashMap.h index b54466e8..8ff8bb5f 100644 --- a/catkit_core/HashMap.h +++ b/catkit_core/HashMap.h @@ -4,12 +4,10 @@ #include #include #include -#include -#include -#include +#include // MurmurHash3 32-bit version -uint32_t murmurhash3(const std::string &key, uint32_t seed = 0) +uint32_t murmurhash3(const std::string_view &key, uint32_t seed = 0) { const uint8_t *data = reinterpret_cast(key.data()); size_t len = key.size(); @@ -87,7 +85,7 @@ class HashMap Entry *m_Data; - size_t hash(const std::string &key) const + size_t GetIndex(std::string_view key) const { return murmurhash3(key) % Size; } @@ -113,15 +111,15 @@ class HashMap } } - bool Insert(const std::string &key, const Value &value) + bool Insert(std::string_view key, const Value &value) { - if (key.size() > MaxKeyLength) + if (key.size() >= MaxKeyLength) { // Key is too long to fit in the fixed-size buffer. return false; } - size_t index = hash(key); + size_t index = GetIndex(key); for (size_t i = 0; i < Size; ++i) { @@ -146,7 +144,7 @@ class HashMap if (flags == EntryFlags::OCCUPIED) { // Check if the key is our key. - if (AreKeysTheSame(m_Data[probe].key, key.c_str())) + if (key == m_Data[probe].key) { // Key already exists. return false; @@ -155,9 +153,9 @@ class HashMap } else { - // Copy key. - std::size_t key_length = std::min(key.size(), MaxKeyLength); - key.copy(m_Data[probe].key, key_length); + // Copy key, ensuring null-termination. + key.copy(m_Data[probe].key, key.size() - 1); + m_Data[probe].key[key.size()] = '\0'; // Copy m_Data. m_Data[probe].value = value; @@ -173,15 +171,15 @@ class HashMap return false; } - Value *Find(const std::string &key) const + Value *Find(std::string_view key) const { - if (key.size() > MaxKeyLength) + if (key.size() >= MaxKeyLength) { // Key is too long to fit in the fixed-size buffer. return nullptr; } - size_t index = hash(key); + size_t index = GetIndex(key); for (size_t i = 0; i < Size; ++i) { @@ -189,7 +187,7 @@ class HashMap EntryFlags flags = m_Data[probe].flags.load(std::memory_order_acquire); - if (flags == EntryFlags::OCCUPIED && AreKeysTheSame(m_Data[probe].key, key.c_str())) + if (flags == EntryFlags::OCCUPIED && key == m_Data[probe].key) { return &m_Data[probe].value; } @@ -204,11 +202,6 @@ class HashMap // Key not found. return nullptr; } - - bool AreKeysTheSame(const char *ky1, const char *ky2) const - { - return std::strncmp(ky1, ky2, MaxKeyLength) == 0; - } }; #endif // HASH_MAP_H From a0cfe0be31c4c751d2a65a88aad86ad6b1b27ce6 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Thu, 19 Dec 2024 21:35:45 -0800 Subject: [PATCH 31/34] Fix fence-post counting issue. --- catkit_core/HashMap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catkit_core/HashMap.h b/catkit_core/HashMap.h index 8ff8bb5f..30b88ee9 100644 --- a/catkit_core/HashMap.h +++ b/catkit_core/HashMap.h @@ -154,7 +154,7 @@ class HashMap else { // Copy key, ensuring null-termination. - key.copy(m_Data[probe].key, key.size() - 1); + key.copy(m_Data[probe].key, key.size()); m_Data[probe].key[key.size()] = '\0'; // Copy m_Data. From 820b5657f036053455070cde2bd66b884b08e6cd Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Thu, 19 Dec 2024 21:43:42 -0800 Subject: [PATCH 32/34] Use Open() and Create(). --- benchmarks/free_list_allocator.cpp | 23 ++++++++------------- catkit_core/FreeListAllocator.cpp | 33 ++++++++++++++++++++---------- catkit_core/FreeListAllocator.h | 9 +++++--- 3 files changed, 37 insertions(+), 28 deletions(-) diff --git a/benchmarks/free_list_allocator.cpp b/benchmarks/free_list_allocator.cpp index 22a8bb7c..a66d733a 100644 --- a/benchmarks/free_list_allocator.cpp +++ b/benchmarks/free_list_allocator.cpp @@ -4,8 +4,6 @@ void benchmark_linux_scalability() { - typedef FreeListAllocator Allocator; - const size_t N = 10000000; const size_t NUM_BLOCKS = N * 2; @@ -16,19 +14,18 @@ void benchmark_linux_scalability() size_t buffer_size = FreeListAllocator::ComputeMetadataBufferSize(NUM_BLOCKS); char *buffer = new char[buffer_size]; - FreeListAllocator allocator(buffer); - allocator.Initialize(NUM_BLOCKS, ALIGNMENT, NUM_BLOCKS * ALIGNMENT); + auto allocator = FreeListAllocator::Create(buffer, NUM_BLOCKS, ALIGNMENT, NUM_BLOCKS * ALIGNMENT); auto start = GetTimeStamp(); for (size_t i = 0; i < N; ++i) { - handles[i] = allocator.Allocate(16); + handles[i] = allocator->Allocate(16); } for (size_t i = 0; i < N; ++i) { - allocator.Deallocate(handles[i]); + allocator->Deallocate(handles[i]); } auto end = GetTimeStamp(); @@ -54,8 +51,7 @@ void benchmark_threadtest() size_t buffer_size = FreeListAllocator::ComputeMetadataBufferSize(NUM_BLOCKS); char *buffer = new char[buffer_size]; - FreeListAllocator allocator(buffer); - allocator.Initialize(NUM_BLOCKS, ALIGNMENT, NUM_BLOCKS * ALIGNMENT); + auto allocator = FreeListAllocator::Create(buffer, NUM_BLOCKS, ALIGNMENT, NUM_BLOCKS * ALIGNMENT); auto start = GetTimeStamp(); @@ -63,12 +59,12 @@ void benchmark_threadtest() { for (size_t j = 0; j < N; ++j) { - handles[j] = allocator.Allocate(16); + handles[j] = allocator->Allocate(16); } for (size_t j = 0; j < N; ++j) { - allocator.Deallocate(handles[j]); + allocator->Deallocate(handles[j]); } } @@ -101,8 +97,7 @@ void benchmark_larson() size_t buffer_size = FreeListAllocator::ComputeMetadataBufferSize(NUM_BLOCKS); char *buffer = new char[buffer_size]; - FreeListAllocator allocator(buffer); - allocator.Initialize(NUM_BLOCKS, ALIGNMENT, MAX_SIZE * NUM_BLOCKS); + auto allocator = FreeListAllocator::Create(buffer, NUM_BLOCKS, ALIGNMENT, MAX_SIZE * NUM_BLOCKS); auto *indices = new size_t[N]; auto *sizes = new size_t[N]; @@ -121,10 +116,10 @@ void benchmark_larson() if (handles[index] != -1) { - allocator.Deallocate(handles[index]); + allocator->Deallocate(handles[index]); } - handles[index] = allocator.Allocate(size); + handles[index] = allocator->Allocate(size); } auto end = GetTimeStamp(); diff --git a/catkit_core/FreeListAllocator.cpp b/catkit_core/FreeListAllocator.cpp index dbb662c8..7c41dc4c 100644 --- a/catkit_core/FreeListAllocator.cpp +++ b/catkit_core/FreeListAllocator.cpp @@ -76,24 +76,35 @@ std::size_t FreeListAllocator::ComputeMetadataBufferSize(std::size_t max_num_blo return size; } -void FreeListAllocator::Initialize(std::size_t max_num_blocks, std::size_t alignment, std::size_t buffer_size) +std::shared_ptr FreeListAllocator::Open(void *metadata_buffer) { - std::copy(VERSION, VERSION + sizeof(VERSION), m_Header.version); - m_Header.max_num_blocks = max_num_blocks; - m_Header.alignment = alignment; - m_Header.total_buffer_size = buffer_size; + return std::shared_ptr(new FreeListAllocator(metadata_buffer)); +} + +std::shared_ptr FreeListAllocator::Create(void *metadata_buffer, std::size_t max_num_blocks, std::size_t alignment, std::size_t buffer_size) +{ + auto allocator = std::shared_ptr(new FreeListAllocator(metadata_buffer)); + + auto &header = allocator->m_Header; + + std::copy(VERSION, VERSION + sizeof(VERSION), header.version); + header.max_num_blocks = max_num_blocks; + header.alignment = alignment; + header.total_buffer_size = buffer_size; // Initialize the internal allocator. - m_BlockAllocator.Initialize(max_num_blocks); + allocator->m_BlockAllocator.Initialize(max_num_blocks); // Initialize the free list. - m_Head = m_BlockAllocator.Allocate(); + allocator->m_Head = allocator->m_BlockAllocator.Allocate(); - std::size_t block_list_offset = sizeof(Header) + PoolAllocator::CalculateMetadataBufferSize(m_MaxNumBlocks); - m_Blocks = reinterpret_cast(static_cast(m_MetadataBuffer) + block_list_offset); + std::size_t block_list_offset = sizeof(Header) + PoolAllocator::CalculateMetadataBufferSize(max_num_blocks); + allocator->m_Blocks = reinterpret_cast(static_cast(metadata_buffer) + block_list_offset); + + allocator->m_Blocks[allocator->m_Head].descriptor = BlockDescriptor(0, buffer_size, true); + allocator->m_Blocks[allocator->m_Head].next = INVALID_HANDLE; - m_Blocks[m_Head].descriptor = BlockDescriptor(0, buffer_size, true); - m_Blocks[m_Head].next = INVALID_HANDLE; + return allocator; } typename FreeListAllocator::BlockHandle FreeListAllocator::Allocate(std::size_t size) diff --git a/catkit_core/FreeListAllocator.h b/catkit_core/FreeListAllocator.h index e035c464..cc8194f8 100644 --- a/catkit_core/FreeListAllocator.h +++ b/catkit_core/FreeListAllocator.h @@ -5,10 +5,14 @@ #include #include +#include // A simple lock-free free list allocator. class FreeListAllocator { +private: + FreeListAllocator(void *metadata_buffer); + public: using BlockHandle = PoolAllocator::BlockHandle; using Offset = std::uint32_t; @@ -16,11 +20,10 @@ class FreeListAllocator static const BlockHandle INVALID_HANDLE = PoolAllocator::INVALID_HANDLE; - FreeListAllocator(void *metadata_buffer); - static std::size_t ComputeMetadataBufferSize(std::size_t max_num_blocks); - void Initialize(std::size_t max_num_blocks, std::size_t alignment, std::size_t buffer_size); + static std::shared_ptr Create(void *metadata_buffer, std::size_t max_num_blocks, std::size_t alignment, std::size_t buffer_size); + static std::shared_ptr Open(void *metadata_buffer); BlockHandle Allocate(std::size_t size); void Deallocate(BlockHandle index); From 8cbc70cb71381cf5b3001750ef72a4fdb2dd897c Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Fri, 20 Dec 2024 01:04:01 -0800 Subject: [PATCH 33/34] Use Open() and Create() functions instead of Initialize(). --- benchmarks/pool_allocator.cpp | 7 ++-- catkit_core/FreeListAllocator.cpp | 67 +++++++++++++++++++------------ catkit_core/FreeListAllocator.h | 11 +++-- catkit_core/PoolAllocator.cpp | 48 ++++++++++++++++------ catkit_core/PoolAllocator.h | 12 ++++-- 5 files changed, 92 insertions(+), 53 deletions(-) diff --git a/benchmarks/pool_allocator.cpp b/benchmarks/pool_allocator.cpp index 6dd2df38..d9357a16 100644 --- a/benchmarks/pool_allocator.cpp +++ b/benchmarks/pool_allocator.cpp @@ -9,8 +9,7 @@ void benchmark_linux_scalability() char *buffer = new char[PoolAllocator::CalculateMetadataBufferSize(CAPACITY)]; - PoolAllocator allocator(buffer); - allocator.Initialize(CAPACITY); + auto allocator = PoolAllocator::Create(buffer, CAPACITY); auto *handles = new PoolAllocator::BlockHandle[N]; @@ -18,12 +17,12 @@ void benchmark_linux_scalability() for (size_t i = 0; i < N; ++i) { - handles[i] = allocator.Allocate(); + handles[i] = allocator->Allocate(); } for (size_t i = 0; i < N; ++i) { - allocator.Deallocate(handles[i]); + allocator->Deallocate(handles[i]); } auto end = GetTimeStamp(); diff --git a/catkit_core/FreeListAllocator.cpp b/catkit_core/FreeListAllocator.cpp index 7c41dc4c..5d005068 100644 --- a/catkit_core/FreeListAllocator.cpp +++ b/catkit_core/FreeListAllocator.cpp @@ -55,16 +55,14 @@ void FreeListAllocator::BlockDescriptor::SetFree(const bool &is_free) m_SizeAndFreeFlag = (m_SizeAndFreeFlag & ~_FREE_FLAG) | (_FREE_FLAG * is_free); } -FreeListAllocator::FreeListAllocator(void *metadata_buffer) - : m_Header(*static_cast
(metadata_buffer)), +FreeListAllocator::FreeListAllocator(Header *header, std::shared_ptr block_allocator, Block *blocks) + : m_Header(*header), + m_BlockAllocator(block_allocator), + m_Blocks(blocks), m_MaxNumBlocks(m_Header.max_num_blocks), m_Head(m_Header.head), - m_Alignment(m_Header.alignment), - m_BlockAllocator(static_cast(metadata_buffer) + sizeof(Header)), - m_MetadataBuffer(metadata_buffer) + m_Alignment(m_Header.alignment) { - std::size_t block_list_offset = sizeof(Header) + PoolAllocator::CalculateMetadataBufferSize(m_MaxNumBlocks); - m_Blocks = reinterpret_cast(static_cast(m_MetadataBuffer) + block_list_offset); } std::size_t FreeListAllocator::ComputeMetadataBufferSize(std::size_t max_num_blocks) @@ -76,35 +74,52 @@ std::size_t FreeListAllocator::ComputeMetadataBufferSize(std::size_t max_num_blo return size; } +void FreeListAllocator::GetMemoryLayout(void *metadata_buffer, std::size_t max_num_blocks, void **block_allocator_memory, Block **blocks) +{ + std::size_t offset = sizeof(Header); + *block_allocator_memory = static_cast(static_cast(metadata_buffer) + offset); + + offset += PoolAllocator::CalculateMetadataBufferSize(max_num_blocks); + *blocks = reinterpret_cast(static_cast(metadata_buffer) + offset); +} + std::shared_ptr FreeListAllocator::Open(void *metadata_buffer) { - return std::shared_ptr(new FreeListAllocator(metadata_buffer)); + Header *header = static_cast
(metadata_buffer); + + void *block_allocator_memory; + Block *blocks; + GetMemoryLayout(metadata_buffer, header->max_num_blocks, &block_allocator_memory, &blocks); + + auto block_allocator = PoolAllocator::Open(block_allocator_memory); + + return std::shared_ptr(new FreeListAllocator(header, block_allocator, blocks)); } std::shared_ptr FreeListAllocator::Create(void *metadata_buffer, std::size_t max_num_blocks, std::size_t alignment, std::size_t buffer_size) { - auto allocator = std::shared_ptr(new FreeListAllocator(metadata_buffer)); + Header *header = static_cast
(metadata_buffer); - auto &header = allocator->m_Header; + void *block_allocator_memory; + Block *blocks; + GetMemoryLayout(metadata_buffer, header->max_num_blocks, &block_allocator_memory, &blocks); - std::copy(VERSION, VERSION + sizeof(VERSION), header.version); - header.max_num_blocks = max_num_blocks; - header.alignment = alignment; - header.total_buffer_size = buffer_size; + // Fill in the header information. + std::copy(VERSION, VERSION + sizeof(VERSION), header->version); + header->max_num_blocks = max_num_blocks; + header->alignment = alignment; + header->total_buffer_size = buffer_size; - // Initialize the internal allocator. - allocator->m_BlockAllocator.Initialize(max_num_blocks); + // Create the block allocator. + auto block_allocator = PoolAllocator::Create(block_allocator_memory, max_num_blocks); // Initialize the free list. - allocator->m_Head = allocator->m_BlockAllocator.Allocate(); - - std::size_t block_list_offset = sizeof(Header) + PoolAllocator::CalculateMetadataBufferSize(max_num_blocks); - allocator->m_Blocks = reinterpret_cast(static_cast(metadata_buffer) + block_list_offset); + header->head = block_allocator->Allocate(); - allocator->m_Blocks[allocator->m_Head].descriptor = BlockDescriptor(0, buffer_size, true); - allocator->m_Blocks[allocator->m_Head].next = INVALID_HANDLE; + blocks[header->head].descriptor = BlockDescriptor(0, buffer_size, true); + blocks[header->head].next = INVALID_HANDLE; - return allocator; + return std::shared_ptr(new FreeListAllocator(header, block_allocator, blocks)); } typename FreeListAllocator::BlockHandle FreeListAllocator::Allocate(std::size_t size) @@ -182,7 +197,7 @@ typename FreeListAllocator::BlockHandle FreeListAllocator::Allocate(std::size_t // We now have a block that is large enough to allocate the requested size. // Add a new block for the remaining free space. - PoolAllocator::BlockHandle allocated_block_handle = m_BlockAllocator.Allocate(); + PoolAllocator::BlockHandle allocated_block_handle = m_BlockAllocator->Allocate(); DEBUG_PRINT("Allocated block handle is " << allocated_block_handle); Block &allocated_block = m_Blocks[allocated_block_handle]; @@ -238,7 +253,7 @@ void FreeListAllocator::Deallocate(BlockHandle index) if (!owns_index) RemoveBlock(index); - m_BlockAllocator.Deallocate(index); + m_BlockAllocator->Deallocate(index); index = prev; owns_index = false; @@ -252,7 +267,7 @@ void FreeListAllocator::Deallocate(BlockHandle index) // The next block is no longer valid. Deallocate it. RemoveBlock(index); - m_BlockAllocator.Deallocate(index); + m_BlockAllocator->Deallocate(index); index = next; owns_index = false; diff --git a/catkit_core/FreeListAllocator.h b/catkit_core/FreeListAllocator.h index cc8194f8..b32c261e 100644 --- a/catkit_core/FreeListAllocator.h +++ b/catkit_core/FreeListAllocator.h @@ -10,9 +10,6 @@ // A simple lock-free free list allocator. class FreeListAllocator { -private: - FreeListAllocator(void *metadata_buffer); - public: using BlockHandle = PoolAllocator::BlockHandle; using Offset = std::uint32_t; @@ -85,17 +82,19 @@ class FreeListAllocator static_assert(offsetof(Header, head) == 16); static_assert(sizeof(Header) == 20); + FreeListAllocator(Header *header, std::shared_ptr block_allocator, Block *blocks); + + static void GetMemoryLayout(void *metadata_buffer, std::size_t max_num_blocks, void **block_allocator_memory, Block **blocks); + Header &m_Header; std::uint32_t &m_MaxNumBlocks; std::uint32_t &m_Alignment; std::atomic &m_Head; - PoolAllocator m_BlockAllocator; + std::shared_ptr m_BlockAllocator; Block *m_Blocks; - void *m_MetadataBuffer; - BlockHandle FindFirstFreeBlock(Size size); void InsertBlockSorted(BlockHandle index); diff --git a/catkit_core/PoolAllocator.cpp b/catkit_core/PoolAllocator.cpp index a5896020..715751d4 100644 --- a/catkit_core/PoolAllocator.cpp +++ b/catkit_core/PoolAllocator.cpp @@ -4,34 +4,56 @@ const std::uint8_t VERSION[4] = {0, 0, 0, 0}; -PoolAllocator::PoolAllocator(void *metadata_buffer) - : m_Header(*static_cast
(metadata_buffer)), - m_Capacity(m_Header.capacity), - m_Head(m_Header.head), - m_Next(reinterpret_cast(static_cast(metadata_buffer) + sizeof(Header))) +PoolAllocator::PoolAllocator(Header *header, std::atomic *next) + : m_Header(*header), + m_Next(next), + m_Capacity(m_Header.capacity), + m_Head(m_Header.head) { } -void PoolAllocator::Initialize(std::uint32_t capacity) +void PoolAllocator::GetMemoryLayout(void *metadata_buffer, std::atomic **next) { + *next = reinterpret_cast *>(static_cast(metadata_buffer) + sizeof(Header)); +} + +std::shared_ptr PoolAllocator::Create(void *metadata_buffer, std::uint32_t capacity) +{ + Header *header = static_cast(metadata_buffer); + + std::atomic *next; + GetMemoryLayout(metadata_buffer, &next); + // Set version and capacity. - std::copy(VERSION, VERSION + sizeof(VERSION), m_Header.version); - m_Capacity = capacity; + std::copy(VERSION, VERSION + sizeof(VERSION), header->version); + header->capacity = capacity; // Initialize the linked list. - m_Head.store(0, std::memory_order_relaxed); + header->head.store(0, std::memory_order_relaxed); - for (std::size_t i = 0; i < m_Capacity; ++i) + for (std::size_t i = 0; i < capacity; ++i) { - if (i == m_Capacity - 1) + if (i == capacity - 1) { - m_Next[i] = INVALID_HANDLE; + next[i] = INVALID_HANDLE; } else { - m_Next[i] = i + 1; + next[i] = i + 1; } } + + return std::shared_ptr(new PoolAllocator(header, next)); +} + +std::shared_ptr PoolAllocator::Open(void *metadata_buffer) +{ + Header *header = static_cast(metadata_buffer); + + std::atomic *next; + GetMemoryLayout(metadata_buffer, &next); + + return std::shared_ptr(new PoolAllocator(header, next)); } std::size_t PoolAllocator::CalculateMetadataBufferSize(std::uint32_t capacity) diff --git a/catkit_core/PoolAllocator.h b/catkit_core/PoolAllocator.h index 2558120a..33ffa267 100644 --- a/catkit_core/PoolAllocator.h +++ b/catkit_core/PoolAllocator.h @@ -5,6 +5,7 @@ #include #include #include +#include // A simple lock-free pool allocator. class PoolAllocator @@ -13,12 +14,11 @@ class PoolAllocator using BlockHandle = std::uint32_t; static const BlockHandle INVALID_HANDLE = std::numeric_limits::max(); - PoolAllocator(void *metadata_buffer); - - void Initialize(std::uint32_t capacity); - static std::size_t CalculateMetadataBufferSize(std::uint32_t capacity); + static std::shared_ptr Create(void *metadata_buffer, std::uint32_t capacity); + static std::shared_ptr Open(void *metadata_buffer); + BlockHandle Allocate(); void Deallocate(BlockHandle index); @@ -36,6 +36,10 @@ class PoolAllocator static_assert(offsetof(PoolAllocator::Header, head) == 8); static_assert(sizeof(PoolAllocator::Header) == 12); + PoolAllocator(Header *header, std::atomic *next); + + static void GetMemoryLayout(void *metadata_buffer, std::atomic **next); + Header &m_Header; std::uint32_t &m_Capacity; From 7c5bd95530c2904e781118fbdb33ad2f9bee4a79 Mon Sep 17 00:00:00 2001 From: Emiel Por Date: Fri, 20 Dec 2024 13:09:27 -0800 Subject: [PATCH 34/34] Use actual number of blocks, not read from non-initialized memory. --- catkit_core/FreeListAllocator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catkit_core/FreeListAllocator.cpp b/catkit_core/FreeListAllocator.cpp index 5d005068..cfc8ec10 100644 --- a/catkit_core/FreeListAllocator.cpp +++ b/catkit_core/FreeListAllocator.cpp @@ -102,7 +102,7 @@ std::shared_ptr FreeListAllocator::Create(void *metadata_buff void *block_allocator_memory; Block *blocks; - GetMemoryLayout(metadata_buffer, header->max_num_blocks, &block_allocator_memory, &blocks); + GetMemoryLayout(metadata_buffer, max_num_blocks, &block_allocator_memory, &blocks); // Fill in the header information. std::copy(VERSION, VERSION + sizeof(VERSION), header->version);