From 67349f55c4ef31725467c603216912b79c4c3a6f Mon Sep 17 00:00:00 2001 From: phuslu Date: Tue, 2 Jan 2024 11:22:07 +0800 Subject: [PATCH] switch to maphash from wyhash --- cache.go | 42 ++++----------- maphash.go | 137 +++++++++++++++++++++++++++++++++++++++++++++++ shard.go | 4 +- wyhash.go | 152 ----------------------------------------------------- 4 files changed, 148 insertions(+), 187 deletions(-) create mode 100644 maphash.go delete mode 100644 wyhash.go diff --git a/cache.go b/cache.go index b293596..c181cbc 100644 --- a/cache.go +++ b/cache.go @@ -4,14 +4,13 @@ package lru import ( "runtime" "time" - "unsafe" ) // Cache implements LRU Cache with least recent used eviction policy. type Cache[K comparable, V any] struct { - shards []shard[K, V] - mask uint32 - keysize int + shards []shard[K, V] + mask uint32 + hash func(K) uint64 } // New creates lru cache with size capacity. @@ -31,65 +30,42 @@ func newWithShards[K comparable, V any](shardcount, shardsize int) *Cache[K, V] c := &Cache[K, V]{ shards: make([]shard[K, V], shardcount), mask: uint32(shardcount) - 1, + hash: maphash_NewHasher[K]().Hash, } for i := range c.shards { c.shards[i] = *newshard[K, V](shardsize) } - var k K - switch ((any)(k)).(type) { - case string: - c.keysize = 0 - default: - c.keysize = int(unsafe.Sizeof(k)) - } - return c } -func (c *Cache[K, V]) hash(key K) uint32 { - if c.keysize == 0 { - data := *(*string)(unsafe.Pointer(&key)) - if len(data) == 0 { - return 0 - } - - return uint32(wyhash_hash(data, 0) & 0xFFFFFFFF) - } - - return uint32(wyhash_hash(*(*string)(unsafe.Pointer(&struct { - data unsafe.Pointer - len int - }{unsafe.Pointer(&key), c.keysize})), 0) & 0xFFFFFFFF) -} - // Get returns value for key. func (c *Cache[K, V]) Get(key K) (value V, ok bool) { - hash := c.hash(key) + hash := uint32(c.hash(key) & 0xFFFFFFFF) return c.shards[hash&c.mask].Get(hash, key) } // Peek returns value for key, but does not modify its recency. func (c *Cache[K, V]) Peek(key K) (value V, ok bool) { - hash := c.hash(key) + hash := uint32(c.hash(key) & 0xFFFFFFFF) return c.shards[hash&c.mask].Peek(hash, key) } // Set inserts key value pair and returns previous value, if cache was full. func (c *Cache[K, V]) Set(key K, value V) (prev V, replaced bool) { - hash := c.hash(key) + hash := uint32(c.hash(key) & 0xFFFFFFFF) return c.shards[hash&c.mask].Set(hash, c.hash, key, value, 0) } // SetWithTTL inserts key value pair with ttl and returns previous value, if cache was full. func (c *Cache[K, V]) SetWithTTL(key K, value V, ttl time.Duration) (prev V, replaced bool) { - hash := c.hash(key) + hash := uint32(c.hash(key) & 0xFFFFFFFF) return c.shards[hash&c.mask].Set(hash, c.hash, key, value, ttl) } // Delete method deletes value associated with key and returns deleted value (or empty value if key was not in cache). func (c *Cache[K, V]) Delete(key K) (prev V) { - hash := c.hash(key) + hash := uint32(c.hash(key) & 0xFFFFFFFF) return c.shards[hash&c.mask].Delete(hash, key) } diff --git a/maphash.go b/maphash.go new file mode 100644 index 0000000..b17c923 --- /dev/null +++ b/maphash.go @@ -0,0 +1,137 @@ +// Copyright 2022 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by golang.org/x/tools/cmd/bundle. DO NOT EDIT. +//go:generate bundle -o maphash.go github.com/dolthub/maphash + +package lru + +import ( + "math/rand" + "unsafe" +) + +// Hasher hashes values of type K. +// Uses runtime AES-based hashing. +type maphash_Hasher[K comparable] struct { + hash maphash_hashfn + seed uintptr +} + +// NewHasher creates a new Hasher[K] with a random seed. +func maphash_NewHasher[K comparable]() maphash_Hasher[K] { + return maphash_Hasher[K]{ + hash: maphash_getRuntimeHasher[K](), + seed: maphash_newHashSeed(), + } +} + +// NewSeed returns a copy of |h| with a new hash seed. +func maphash_NewSeed[K comparable](h maphash_Hasher[K]) maphash_Hasher[K] { + return maphash_Hasher[K]{ + hash: h.hash, + seed: maphash_newHashSeed(), + } +} + +// Hash hashes |key|. +func (h maphash_Hasher[K]) Hash(key K) uint64 { + // promise to the compiler that pointer + // |p| does not escape the stack. + p := maphash_noescape(unsafe.Pointer(&key)) + return uint64(h.hash(p, h.seed)) +} + +type maphash_hashfn func(unsafe.Pointer, uintptr) uintptr + +func maphash_getRuntimeHasher[K comparable]() (h maphash_hashfn) { + a := any(make(map[K]struct{})) + i := (*maphash_mapiface)(unsafe.Pointer(&a)) + h = i.typ.hasher + return +} + +func maphash_newHashSeed() uintptr { + return uintptr(rand.Int()) +} + +// noescape hides a pointer from escape analysis. It is the identity function +// but escape analysis doesn't think the output depends on the input. +// noescape is inlined and currently compiles down to zero instructions. +// USE CAREFULLY! +// This was copied from the runtime (via pkg "strings"); see issues 23382 and 7921. +// +//go:nosplit +//go:nocheckptr +func maphash_noescape(p unsafe.Pointer) unsafe.Pointer { + x := uintptr(p) + return unsafe.Pointer(x ^ 0) +} + +type maphash_mapiface struct { + typ *maphash_maptype + val *maphash_hmap +} + +// go/src/runtime/type.go +type maphash_maptype struct { + typ maphash__type + key *maphash__type + elem *maphash__type + bucket *maphash__type + // function for hashing keys (ptr to key, seed) -> hash + hasher func(unsafe.Pointer, uintptr) uintptr + keysize uint8 + elemsize uint8 + bucketsize uint16 + flags uint32 +} + +// go/src/runtime/map.go +type maphash_hmap struct { + count int + flags uint8 + B uint8 + noverflow uint16 + // hash seed + hash0 uint32 + buckets unsafe.Pointer + oldbuckets unsafe.Pointer + nevacuate uintptr + // true type is *mapextra + // but we don't need this data + extra unsafe.Pointer +} + +// go/src/runtime/type.go +type maphash_tflag uint8 + +type maphash_nameOff int32 + +type maphash_typeOff int32 + +// go/src/runtime/type.go +type maphash__type struct { + size uintptr + ptrdata uintptr + hash uint32 + tflag maphash_tflag + align uint8 + fieldAlign uint8 + kind uint8 + equal func(unsafe.Pointer, unsafe.Pointer) bool + gcdata *byte + str maphash_nameOff + ptrToThis maphash_typeOff +} diff --git a/shard.go b/shard.go index 5fc89e4..64ea68a 100644 --- a/shard.go +++ b/shard.go @@ -52,7 +52,7 @@ func (s *shard[K, V]) Peek(hash uint32, key K) (value V, ok bool) { return } -func (s *shard[K, V]) Set(hash uint32, hashfun func(K) uint32, key K, value V, ttl time.Duration) (prev V, replaced bool) { +func (s *shard[K, V]) Set(hash uint32, hashfun func(K) uint64, key K, value V, ttl time.Duration) (prev V, replaced bool) { s.mu.Lock() defer s.mu.Unlock() @@ -72,7 +72,7 @@ func (s *shard[K, V]) Set(hash uint32, hashfun func(K) uint32, key K, value V, t index := s.list.Back() node := &s.list.nodes[index] evictedValue := node.value - s.table.Delete(hashfun(node.key), node.key) + s.table.Delete(uint32(hashfun(node.key)&0xFFFFFFFF), node.key) node.key = key node.value = value diff --git a/wyhash.go b/wyhash.go deleted file mode 100644 index 4b5a061..0000000 --- a/wyhash.go +++ /dev/null @@ -1,152 +0,0 @@ -// Code generated by golang.org/x/tools/cmd/bundle. DO NOT EDIT. -//go:generate bundle -o wyhash.go github.com/zeebo/wyhash - -package lru - -import ( - "encoding/binary" - "math/bits" - "unsafe" -) - -type ( - wyhash_ptr = unsafe.Pointer - wyhash_uptr = uintptr - - wyhash_u32 = uint32 - wyhash_u64 = uint64 -) - -const ( - wyhash__wyp0 = 0xa0761d6478bd642f - wyhash__wyp1 = 0xe7037ed1a0b428db - wyhash__wyp2 = 0x8ebc6af09c88c6e3 - wyhash__wyp3 = 0x589965cc75374cc3 - wyhash__wyp4 = 0x1d8e4e27c47d124f -) - -func wyhash_i(p wyhash_ptr, n wyhash_uptr) wyhash_ptr { return wyhash_ptr(wyhash_uptr(p) + n) } - -func wyhash__wymum(A, B wyhash_u64) wyhash_u64 { - hi, lo := bits.Mul64(A, B) - return hi ^ lo -} - -func wyhash__wyr8(p wyhash_ptr) wyhash_u64 { - return binary.LittleEndian.Uint64((*[8]byte)(p)[:]) -} - -func wyhash__wyr4(p wyhash_ptr) wyhash_u64 { - return wyhash_u64(binary.LittleEndian.Uint32((*[4]byte)(p)[:])) -} - -func wyhash__wyr3(p wyhash_ptr, k wyhash_uptr) wyhash_u64 { - b0 := wyhash_u64(*(*byte)(p)) - b1 := wyhash_u64(*(*byte)(wyhash_i(p, k>>1))) - b2 := wyhash_u64(*(*byte)(wyhash_i(p, k-1))) - return b0<<16 | b1<<8 | b2 -} - -func wyhash__wyr9(p wyhash_ptr) wyhash_u64 { - b := (*[8]byte)(p) - return wyhash_u64(wyhash_u32(b[0])|wyhash_u32(b[1])<<8|wyhash_u32(b[2])<<16|wyhash_u32(b[3])<<24)<<32 | - wyhash_u64(wyhash_u32(b[4])|wyhash_u32(b[5])<<8|wyhash_u32(b[6])<<16|wyhash_u32(b[7])<<24) -} - -func wyhash_hash(data string, seed wyhash_u64) wyhash_u64 { - p, len := *(*wyhash_ptr)(wyhash_ptr(&data)), wyhash_uptr(len(data)) - see1, off := seed, len - - switch { - case len <= 0x03: - return wyhash__wymum(wyhash__wymum(wyhash__wyr3(p, len)^seed^wyhash__wyp0, seed^wyhash__wyp1)^seed, wyhash_u64(len)^wyhash__wyp4) - - case len <= 0x08: - return wyhash__wymum(wyhash__wymum(wyhash__wyr4(wyhash_i(p, 0x00))^seed^wyhash__wyp0, wyhash__wyr4(wyhash_i(p, len-0x04))^seed^wyhash__wyp1)^seed, wyhash_u64(len)^wyhash__wyp4) - - case len <= 0x10: - return wyhash__wymum(wyhash__wymum(wyhash__wyr9(wyhash_i(p, 0x00))^seed^wyhash__wyp0, wyhash__wyr9(wyhash_i(p, len-0x08))^seed^wyhash__wyp1)^seed, wyhash_u64(len)^wyhash__wyp4) - - case len <= 0x18: - return wyhash__wymum(wyhash__wymum(wyhash__wyr9(wyhash_i(p, 0x00))^seed^wyhash__wyp0, wyhash__wyr9(wyhash_i(p, 0x08))^seed^wyhash__wyp1)^wyhash__wymum(wyhash__wyr9(wyhash_i(p, len-0x08))^seed^wyhash__wyp2, seed^wyhash__wyp3), wyhash_u64(len)^wyhash__wyp4) - - case len <= 0x20: - return wyhash__wymum(wyhash__wymum(wyhash__wyr9(wyhash_i(p, 0x00))^seed^wyhash__wyp0, wyhash__wyr9(wyhash_i(p, 0x08))^seed^wyhash__wyp1)^wyhash__wymum(wyhash__wyr9(wyhash_i(p, 0x10))^seed^wyhash__wyp2, wyhash__wyr9(wyhash_i(p, len-0x08))^seed^wyhash__wyp3), wyhash_u64(len)^wyhash__wyp4) - - case len <= 0x100: - seed = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x00))^seed^wyhash__wyp0, wyhash__wyr8(wyhash_i(p, 0x08))^seed^wyhash__wyp1) - see1 = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x10))^see1^wyhash__wyp2, wyhash__wyr8(wyhash_i(p, 0x18))^see1^wyhash__wyp3) - if len > 0x40 { - seed = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x20))^seed^wyhash__wyp0, wyhash__wyr8(wyhash_i(p, 0x28))^seed^wyhash__wyp1) - see1 = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x30))^see1^wyhash__wyp2, wyhash__wyr8(wyhash_i(p, 0x38))^see1^wyhash__wyp3) - if len > 0x60 { - seed = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x40))^seed^wyhash__wyp0, wyhash__wyr8(wyhash_i(p, 0x48))^seed^wyhash__wyp1) - see1 = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x50))^see1^wyhash__wyp2, wyhash__wyr8(wyhash_i(p, 0x58))^see1^wyhash__wyp3) - if len > 0x80 { - seed = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x60))^seed^wyhash__wyp0, wyhash__wyr8(wyhash_i(p, 0x68))^seed^wyhash__wyp1) - see1 = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x70))^see1^wyhash__wyp2, wyhash__wyr8(wyhash_i(p, 0x78))^see1^wyhash__wyp3) - if len > 0xa0 { - seed = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x80))^seed^wyhash__wyp0, wyhash__wyr8(wyhash_i(p, 0x88))^seed^wyhash__wyp1) - see1 = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x90))^see1^wyhash__wyp2, wyhash__wyr8(wyhash_i(p, 0x98))^see1^wyhash__wyp3) - if len > 0xc0 { - seed = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0xa0))^seed^wyhash__wyp0, wyhash__wyr8(wyhash_i(p, 0xa8))^seed^wyhash__wyp1) - see1 = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0xb0))^see1^wyhash__wyp2, wyhash__wyr8(wyhash_i(p, 0xb8))^see1^wyhash__wyp3) - if len > 0xe0 { - seed = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0xc0))^seed^wyhash__wyp0, wyhash__wyr8(wyhash_i(p, 0xc8))^seed^wyhash__wyp1) - see1 = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0xd0))^see1^wyhash__wyp2, wyhash__wyr8(wyhash_i(p, 0xd8))^see1^wyhash__wyp3) - } - } - } - } - } - } - - off = (off-1)%0x20 + 1 - p = wyhash_i(p, len-off) - - default: - for ; off > 0x100; off, p = off-0x100, wyhash_i(p, 0x100) { - seed = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x00))^seed^wyhash__wyp0, wyhash__wyr8(wyhash_i(p, 0x08))^seed^wyhash__wyp1) ^ wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x10))^seed^wyhash__wyp2, wyhash__wyr8(wyhash_i(p, 0x18))^seed^wyhash__wyp3) - see1 = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x20))^see1^wyhash__wyp1, wyhash__wyr8(wyhash_i(p, 0x28))^see1^wyhash__wyp2) ^ wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x30))^see1^wyhash__wyp3, wyhash__wyr8(wyhash_i(p, 0x38))^see1^wyhash__wyp0) - seed = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x40))^seed^wyhash__wyp0, wyhash__wyr8(wyhash_i(p, 0x48))^seed^wyhash__wyp1) ^ wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x50))^seed^wyhash__wyp2, wyhash__wyr8(wyhash_i(p, 0x58))^seed^wyhash__wyp3) - see1 = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x60))^see1^wyhash__wyp1, wyhash__wyr8(wyhash_i(p, 0x68))^see1^wyhash__wyp2) ^ wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x70))^see1^wyhash__wyp3, wyhash__wyr8(wyhash_i(p, 0x78))^see1^wyhash__wyp0) - seed = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x80))^seed^wyhash__wyp0, wyhash__wyr8(wyhash_i(p, 0x88))^seed^wyhash__wyp1) ^ wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x90))^seed^wyhash__wyp2, wyhash__wyr8(wyhash_i(p, 0x98))^seed^wyhash__wyp3) - see1 = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0xa0))^see1^wyhash__wyp1, wyhash__wyr8(wyhash_i(p, 0xa8))^see1^wyhash__wyp2) ^ wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0xb0))^see1^wyhash__wyp3, wyhash__wyr8(wyhash_i(p, 0xb8))^see1^wyhash__wyp0) - seed = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0xc0))^seed^wyhash__wyp0, wyhash__wyr8(wyhash_i(p, 0xc8))^seed^wyhash__wyp1) ^ wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0xd0))^seed^wyhash__wyp2, wyhash__wyr8(wyhash_i(p, 0xd8))^seed^wyhash__wyp3) - see1 = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0xe0))^see1^wyhash__wyp1, wyhash__wyr8(wyhash_i(p, 0xe8))^see1^wyhash__wyp2) ^ wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0xf0))^see1^wyhash__wyp3, wyhash__wyr8(wyhash_i(p, 0xf8))^see1^wyhash__wyp0) - } - for ; off > 0x20; off, p = off-0x20, wyhash_i(p, 0x20) { - seed = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x00))^seed^wyhash__wyp0, wyhash__wyr8(wyhash_i(p, 0x08))^seed^wyhash__wyp1) - see1 = wyhash__wymum(wyhash__wyr8(wyhash_i(p, 0x10))^see1^wyhash__wyp2, wyhash__wyr8(wyhash_i(p, 0x18))^see1^wyhash__wyp3) - } - } - - switch { - case off > 0x18: - seed = wyhash__wymum(wyhash__wyr9(wyhash_i(p, 0x00))^seed^wyhash__wyp0, wyhash__wyr9(wyhash_i(p, 0x08))^seed^wyhash__wyp1) - see1 = wyhash__wymum(wyhash__wyr9(wyhash_i(p, 0x10))^see1^wyhash__wyp2, wyhash__wyr9(wyhash_i(p, off-0x08))^see1^wyhash__wyp3) - - case off > 0x10: - seed = wyhash__wymum(wyhash__wyr9(wyhash_i(p, 0x00))^seed^wyhash__wyp0, wyhash__wyr9(wyhash_i(p, 0x08))^seed^wyhash__wyp1) - see1 = wyhash__wymum(wyhash__wyr9(wyhash_i(p, off-0x08))^see1^wyhash__wyp2, see1^wyhash__wyp3) - - case off > 0x08: - seed = wyhash__wymum(wyhash__wyr9(wyhash_i(p, 0x00))^seed^wyhash__wyp0, wyhash__wyr9(wyhash_i(p, off-0x08))^seed^wyhash__wyp1) - - case off > 0x03: - seed = wyhash__wymum(wyhash__wyr4(wyhash_i(p, 0x00))^seed^wyhash__wyp0, wyhash__wyr4(wyhash_i(p, off-0x04))^seed^wyhash__wyp1) - - default: - seed = wyhash__wymum(wyhash__wyr3(p, off)^seed^wyhash__wyp0, seed^wyhash__wyp1) - } - - return wyhash__wymum(seed^see1, wyhash_u64(len)^wyhash__wyp4) -} - -// HashString returns a 64bit digest of the data with different ones for every seed. -func wyhash_HashString(data string, seed uint64) uint64 { - if len(data) == 0 { - return seed - } - return wyhash_hash(*(*string)(wyhash_ptr(&data)), seed) -}