From 02c9e57b88b15b5c7bc9e422c434db7e91d62c38 Mon Sep 17 00:00:00 2001 From: yura Date: Tue, 20 Aug 2019 16:39:33 +0300 Subject: [PATCH 1/2] move test to separate package and add randomness to buf length --- murmur_test.go | 66 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/murmur_test.go b/murmur_test.go index 945e4e2..c219c88 100644 --- a/murmur_test.go +++ b/murmur_test.go @@ -1,9 +1,11 @@ -package murmur3 +package murmur3_test import ( "fmt" "strconv" "testing" + + "github.com/spaolacci/murmur3" ) var data = []struct { @@ -35,7 +37,7 @@ var data = []struct { func TestRefStrings(t *testing.T) { for _, elem := range data { - h32 := New32WithSeed(elem.seed) + h32 := murmur3.New32WithSeed(elem.seed) h32.Write([]byte(elem.s)) if v := h32.Sum32(); v != elem.h32 { t.Errorf("[Hash32] key: '%s', seed: '%d': 0x%x (want 0x%x)", elem.s, elem.seed, v, elem.h32) @@ -48,11 +50,11 @@ func TestRefStrings(t *testing.T) { t.Errorf("[Hash32] key: '%s', seed: '%d': %s (want %s)", elem.s, elem.seed, p, target) } - if v := Sum32WithSeed([]byte(elem.s), elem.seed); v != elem.h32 { + if v := murmur3.Sum32WithSeed([]byte(elem.s), elem.seed); v != elem.h32 { t.Errorf("[Hash32] key '%s', seed: '%d': 0x%x (want 0x%x)", elem.s, elem.seed, v, elem.h32) } - h64 := New64WithSeed(elem.seed) + h64 := murmur3.New64WithSeed(elem.seed) h64.Write([]byte(elem.s)) if v := h64.Sum64(); v != elem.h64_1 { t.Errorf("'[Hash64] key: '%s', seed: '%d': 0x%x (want 0x%x)", elem.s, elem.seed, v, elem.h64_1) @@ -65,11 +67,11 @@ func TestRefStrings(t *testing.T) { t.Errorf("[Hash64] key: '%s', seed: '%d': %s (want %s)", elem.s, elem.seed, p, target) } - if v := Sum64WithSeed([]byte(elem.s), elem.seed); v != elem.h64_1 { + if v := murmur3.Sum64WithSeed([]byte(elem.s), elem.seed); v != elem.h64_1 { t.Errorf("[Hash64] key: '%s', seed: '%d': 0x%x (want 0x%x)", elem.s, elem.seed, v, elem.h64_1) } - h128 := New128WithSeed(elem.seed) + h128 := murmur3.New128WithSeed(elem.seed) h128.Write([]byte(elem.s)) if v1, v2 := h128.Sum128(); v1 != elem.h64_1 || v2 != elem.h64_2 { @@ -83,7 +85,7 @@ func TestRefStrings(t *testing.T) { t.Errorf("[Hash128] key: '%s', seed: '%d': %s (want %s)", elem.s, elem.seed, p, target) } - if v1, v2 := Sum128WithSeed([]byte(elem.s), elem.seed); v1 != elem.h64_1 || v2 != elem.h64_2 { + if v1, v2 := murmur3.Sum128WithSeed([]byte(elem.s), elem.seed); v1 != elem.h64_1 || v2 != elem.h64_2 { t.Errorf("[Hash128] key: '%s', seed: '%d': 0x%x-0x%x (want 0x%x-0x%x)", elem.s, elem.seed, v1, v2, elem.h64_1, elem.h64_2) } } @@ -91,8 +93,8 @@ func TestRefStrings(t *testing.T) { func TestIncremental(t *testing.T) { for _, elem := range data { - h32 := New32WithSeed(elem.seed) - h128 := New128WithSeed(elem.seed) + h32 := murmur3.New32WithSeed(elem.seed) + h128 := murmur3.New128WithSeed(elem.seed) var i, j int for k := len(elem.s); i < k; i = j { j = 2*i + 3 @@ -114,17 +116,25 @@ func TestIncremental(t *testing.T) { } } +var sum uint64 + func Benchmark32(b *testing.B) { buf := make([]byte, 8192) + var r uint64 for length := 1; length <= cap(buf); length *= 2 { + d := length / 4 b.Run(strconv.Itoa(length), func(b *testing.B) { - buf = buf[:length] - b.SetBytes(int64(length)) b.ReportAllocs() b.ResetTimer() + total := 0 for i := 0; i < b.N; i++ { - Sum32(buf) + l := length - rnd(&r, d) + bufn := buf[:l] + total += l + h1 := murmur3.Sum32(bufn) + sum += uint64(h1) } + b.SetBytes(int64(total / b.N)) }) } } @@ -144,7 +154,7 @@ func BenchmarkPartial32(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - hasher := New32() + hasher := murmur3.New32() hasher.Write(buf[0:start]) for j := start; j+k <= length; j += k { @@ -160,30 +170,48 @@ func BenchmarkPartial32(b *testing.B) { func Benchmark64(b *testing.B) { buf := make([]byte, 8192) + var r uint64 for length := 1; length <= cap(buf); length *= 2 { + d := length / 4 b.Run(strconv.Itoa(length), func(b *testing.B) { - buf = buf[:length] - b.SetBytes(int64(length)) b.ReportAllocs() b.ResetTimer() + total := 0 for i := 0; i < b.N; i++ { - Sum64(buf) + l := length - rnd(&r, d) + bufn := buf[:l] + total += l + sum += murmur3.Sum64(bufn) } + b.SetBytes(int64(total / b.N)) }) } } func Benchmark128(b *testing.B) { buf := make([]byte, 8192) + var r uint64 for length := 1; length <= cap(buf); length *= 2 { + d := length / 4 b.Run(strconv.Itoa(length), func(b *testing.B) { - buf = buf[:length] - b.SetBytes(int64(length)) b.ReportAllocs() b.ResetTimer() + total := 0 for i := 0; i < b.N; i++ { - Sum128(buf) + l := length - rnd(&r, d) + bufn := buf[:l] + total += l + h1, _ := murmur3.Sum128(bufn) + sum += h1 } + b.SetBytes(int64(total / b.N)) }) } } + +func rnd(n *uint64, max int) int { + x := *n*5 + 0x23456789 + *n = x + k := (x >> 32) * uint64(max) + return int(k >> 32) +} From 6e5e90107b99cb7c49570d9f328e305602beeff0 Mon Sep 17 00:00:00 2001 From: yura Date: Tue, 20 Aug 2019 16:40:04 +0300 Subject: [PATCH 2/2] use inlined implementation of Sum128WithSeed --- murmur128.go | 106 ++++++++++++++++++++++++++++++++++++++++++++++++--- murmur64.go | 6 +-- 2 files changed, 101 insertions(+), 11 deletions(-) diff --git a/murmur128.go b/murmur128.go index a4fd7e7..55134d6 100644 --- a/murmur128.go +++ b/murmur128.go @@ -189,10 +189,104 @@ func Sum128(data []byte) (h1 uint64, h2 uint64) { return Sum128WithSeed(data, 0) // hasher := New128WithSeed(seed) // hasher.Write(data) // return hasher.Sum128() -func Sum128WithSeed(data []byte, seed uint32) (h1 uint64, h2 uint64) { - d := digest128{h1: uint64(seed), h2: uint64(seed)} - d.seed = seed - d.tail = d.bmix(data) - d.clen = len(data) - return d.Sum128() +func Sum128WithSeed(p []byte, seed uint32) (h1 uint64, h2 uint64) { + h1, h2 = uint64(seed), uint64(seed) + + nblocks := len(p) / 16 + for i := 0; i < nblocks; i++ { + t := (*[2]uint64)(unsafe.Pointer(&p[i*16])) + k1, k2 := t[0], t[1] + + k1 *= c1_128 + k1 = bits.RotateLeft64(k1, 31) + k1 *= c2_128 + h1 ^= k1 + + h1 = bits.RotateLeft64(h1, 27) + h1 += h2 + h1 = h1*5 + 0x52dce729 + + k2 *= c2_128 + k2 = bits.RotateLeft64(k2, 33) + k2 *= c1_128 + h2 ^= k2 + + h2 = bits.RotateLeft64(h2, 31) + h2 += h1 + h2 = h2*5 + 0x38495ab5 + } + + tail := p[nblocks*16:] + var k1, k2 uint64 + switch len(tail) & 15 { + case 15: + k2 ^= uint64(tail[14]) << 48 + fallthrough + case 14: + k2 ^= uint64(tail[13]) << 40 + fallthrough + case 13: + k2 ^= uint64(tail[12]) << 32 + fallthrough + case 12: + k2 ^= uint64(tail[11]) << 24 + fallthrough + case 11: + k2 ^= uint64(tail[10]) << 16 + fallthrough + case 10: + k2 ^= uint64(tail[9]) << 8 + fallthrough + case 9: + k2 ^= uint64(tail[8]) << 0 + + k2 *= c2_128 + k2 = bits.RotateLeft64(k2, 33) + k2 *= c1_128 + h2 ^= k2 + + fallthrough + + case 8: + k1 ^= uint64(tail[7]) << 56 + fallthrough + case 7: + k1 ^= uint64(tail[6]) << 48 + fallthrough + case 6: + k1 ^= uint64(tail[5]) << 40 + fallthrough + case 5: + k1 ^= uint64(tail[4]) << 32 + fallthrough + case 4: + k1 ^= uint64(tail[3]) << 24 + fallthrough + case 3: + k1 ^= uint64(tail[2]) << 16 + fallthrough + case 2: + k1 ^= uint64(tail[1]) << 8 + fallthrough + case 1: + k1 ^= uint64(tail[0]) << 0 + k1 *= c1_128 + k1 = bits.RotateLeft64(k1, 31) + k1 *= c2_128 + h1 ^= k1 + } + + h1 ^= uint64(len(p)) + h2 ^= uint64(len(p)) + + h1 += h2 + h2 += h1 + + h1 = fmix64(h1) + h2 = fmix64(h2) + + h1 += h2 + h2 += h1 + + return } diff --git a/murmur64.go b/murmur64.go index 04fd82a..7a61b09 100644 --- a/murmur64.go +++ b/murmur64.go @@ -48,10 +48,6 @@ func Sum64(data []byte) uint64 { return Sum64WithSeed(data, 0) } // hasher.Write(data) // return hasher.Sum64() func Sum64WithSeed(data []byte, seed uint32) uint64 { - d := digest128{h1: uint64(seed), h2: uint64(seed)} - d.seed = seed - d.tail = d.bmix(data) - d.clen = len(data) - h1, _ := d.Sum128() + h1, _ := Sum128WithSeed(data, seed) return h1 }