Skip to content

Commit

Permalink
Add initial support for custom seeds
Browse files Browse the repository at this point in the history
This adds support for custom seeds when using a Digest (but not to the
one-shot functions Sum64 and Sum64String).

The seed is not stored in the digest itself -- every Reset uses a zero
seed and (Un)MarshalBinary is unchanged. This is simpler for backward
compatibility but may be something to reconsider if we rework the API
for a v3.
  • Loading branch information
cespare committed Apr 4, 2024
1 parent 21fc82b commit 998dce2
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 29 deletions.
29 changes: 22 additions & 7 deletions xxhash.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@ const (
// Store the primes in an array as well.
//
// The consts are used when possible in Go code to avoid MOVs but we need a
// contiguous array of the assembly code.
// contiguous array for the assembly code.
var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}

// Digest implements hash.Hash64.
//
// Note that a zero-valued Digest is not ready to receive writes.
// Call Reset or create a Digest using New before calling other methods.
type Digest struct {
v1 uint64
v2 uint64
Expand All @@ -33,19 +36,31 @@ type Digest struct {
n int // how much of mem is used
}

// New creates a new Digest that computes the 64-bit xxHash algorithm.
// New creates a new Digest with a zero seed.
func New() *Digest {
return NewWithSeed(0)
}

// NewWithSeed creates a new Digest with the given seed.
func NewWithSeed(seed uint64) *Digest {
var d Digest
d.Reset()
d.ResetWithSeed(seed)
return &d
}

// Reset clears the Digest's state so that it can be reused.
// It uses a seed value of zero.
func (d *Digest) Reset() {
d.v1 = primes[0] + prime2
d.v2 = prime2
d.v3 = 0
d.v4 = -primes[0]
d.ResetWithSeed(0)
}

// ResetWithSeed clears the Digest's state so that it can be reused.
// It uses the given seed to initialize the state.
func (d *Digest) ResetWithSeed(seed uint64) {
d.v1 = seed + prime1 + prime2
d.v2 = seed + prime2
d.v3 = seed
d.v4 = seed - prime1
d.total = 0
d.n = 0
}
Expand Down
2 changes: 1 addition & 1 deletion xxhash_asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

package xxhash

// Sum64 computes the 64-bit xxHash digest of b.
// Sum64 computes the 64-bit xxHash digest of b with a zero seed.
//
//go:noescape
func Sum64(b []byte) uint64
Expand Down
2 changes: 1 addition & 1 deletion xxhash_other.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

package xxhash

// Sum64 computes the 64-bit xxHash digest of b.
// Sum64 computes the 64-bit xxHash digest of b with a zero seed.
func Sum64(b []byte) uint64 {
// A simpler version would be
// d := New()
Expand Down
2 changes: 1 addition & 1 deletion xxhash_safe.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

package xxhash

// Sum64String computes the 64-bit xxHash digest of s.
// Sum64String computes the 64-bit xxHash digest of s with a zero seed.
func Sum64String(s string) uint64 {
return Sum64([]byte(s))
}
Expand Down
68 changes: 50 additions & 18 deletions xxhash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,60 @@ import (
"bytes"
"encoding/binary"
"fmt"
"math"
"strings"
"testing"
)

func TestAll(t *testing.T) {
// Exactly 63 characters, which exercises all code paths.
const s63 = "Call me Ishmael. Some years ago--never mind how long precisely-"
for _, tt := range []struct {
name string
input string
seed uint64
want uint64
}{
{"empty", "", 0xef46db3751d8e999},
{"a", "a", 0xd24ec4f1a98c6e5b},
{"as", "as", 0x1c330fb2d66be179},
{"asd", "asd", 0x631c37ce72a97393},
{"asdf", "asdf", 0x415872f599cea71e},
{
"len=63",
// Exactly 63 characters, which exercises all code paths.
"Call me Ishmael. Some years ago--never mind how long precisely-",
0x02a2e85470d6fd96,
},
{"", 0, 0xef46db3751d8e999},
{"a", 0, 0xd24ec4f1a98c6e5b},
{"as", 0, 0x1c330fb2d66be179},
{"asd", 0, 0x631c37ce72a97393},
{"asdf", 0, 0x415872f599cea71e},
{s63, 0, 0x02a2e85470d6fd96},

{"", 123, 0xe0db84de91f3e198},
{"asdf", math.MaxUint64, 0x9a2fd8473be539b6},
{s63, 54321, 0x1736d186daf5d1cd},
} {
lastChunkSize := len(tt.input)
if lastChunkSize == 0 {
lastChunkSize = 1
}
var name string
if tt.input == "" {
name = "input=empty"
} else if len(tt.input) > 10 {
name = fmt.Sprintf("input=len-%d", len(tt.input))
} else {
name = fmt.Sprintf("input=%q", tt.input)
}
if tt.seed != 0 {
name += fmt.Sprintf(",seed=%d", tt.seed)
}
for chunkSize := 1; chunkSize <= lastChunkSize; chunkSize++ {
name := fmt.Sprintf("%s,chunkSize=%d", tt.name, chunkSize)
name := fmt.Sprintf("%s,chunkSize=%d", name, chunkSize)
t.Run(name, func(t *testing.T) {
testDigest(t, tt.input, chunkSize, tt.want)
testDigest(t, tt.input, tt.seed, chunkSize, tt.want)
})
}
t.Run(tt.name, func(t *testing.T) { testSum(t, tt.input, tt.want) })
if tt.seed == 0 {
t.Run(name, func(t *testing.T) { testSum(t, tt.input, tt.want) })
}
}
}

func testDigest(t *testing.T, input string, chunkSize int, want uint64) {
d := New()
ds := New() // uses WriteString
func testDigest(t *testing.T, input string, seed uint64, chunkSize int, want uint64) {
d := NewWithSeed(seed)
ds := NewWithSeed(seed) // uses WriteString
for i := 0; i < len(input); i += chunkSize {
chunk := input[i:]
if len(chunk) > chunkSize {
Expand Down Expand Up @@ -96,6 +111,23 @@ func TestReset(t *testing.T) {
}
}

func TestResetWithSeed(t *testing.T) {
parts := []string{"The quic", "k br", "o", "wn fox jumps", " ov", "er the lazy ", "dog."}
d := NewWithSeed(123)
for _, part := range parts {
d.Write([]byte(part))
}
h0 := d.Sum64()

d.ResetWithSeed(123)
d.Write([]byte(strings.Join(parts, "")))
h1 := d.Sum64()

if h0 != h1 {
t.Errorf("0x%x != 0x%x", h0, h1)
}
}

func TestBinaryMarshaling(t *testing.T) {
d := New()
d.WriteString("abc")
Expand Down
2 changes: 1 addition & 1 deletion xxhash_unsafe.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import (
//
// See https://github.com/golang/go/issues/42739 for discussion.

// Sum64String computes the 64-bit xxHash digest of s.
// Sum64String computes the 64-bit xxHash digest of s with a zero seed.
// It may be faster than Sum64([]byte(s)) by avoiding a copy.
func Sum64String(s string) uint64 {
b := *(*[]byte)(unsafe.Pointer(&sliceHeader{s, len(s)}))
Expand Down

0 comments on commit 998dce2

Please sign in to comment.