From 7e637f0b49ec6aef6268b23e5e0fc99faadaed0c Mon Sep 17 00:00:00 2001 From: Rob Skillington Date: Tue, 17 Apr 2018 17:16:39 -0400 Subject: [PATCH] Update m3x and use code generated maps instead of ident.Hash based maps (#528) --- .excludecoverage | 1 + .excludefmt | 1 + .excludelint | 1 + .excludemetalint | 1 + Makefile | 5 +- client/host_queue.go | 210 ++++++++++---- client/received_blocks_map_gen.go | 251 +++++++++++++++++ client/received_blocks_new_map.go | 58 ++++ client/session.go | 51 ++-- client/session_fetch_bulk_blocks_test.go | 54 ++-- client/session_fetch_high_concurrency_test.go | 2 +- client/session_fetch_test.go | 2 +- client/session_write_tagged_test.go | 4 +- digest/digest.go | 4 +- encoding/m3tsz/encoder.go | 4 +- encoding/m3tsz/encoder_test.go | 18 +- encoding/ostream.go | 2 +- encoding/ostream_test.go | 4 +- generated-source-files.mk | 125 +++++++++ glide.lock | 13 +- glide.yaml | 5 +- integration/client.go | 15 +- integration/data.go | 2 +- integration/disk_flush_helpers.go | 2 +- integration/generate/generate.go | 4 +- integration/mixed_mode_read_write_test.go | 9 +- integration/truncate_namespace_test.go | 2 +- .../server/tchannelthrift/convert/convert.go | 2 +- network/server/tchannelthrift/node/service.go | 4 +- .../tchannelthrift/node/service_test.go | 8 +- persist/fs/clone/cloner_test.go | 2 +- persist/fs/commitlog/read_write_prop_test.go | 12 +- persist/fs/commitlog/writer.go | 4 +- persist/fs/index_lookup.go | 4 +- persist/fs/index_lookup_prop_test.go | 2 +- persist/fs/read.go | 2 +- persist/fs/read_write_test.go | 6 +- persist/fs/retriever.go | 4 +- persist/fs/retriever_concurrent_test.go | 30 +- persist/fs/seek.go | 4 +- persist/fs/seek_test.go | 12 +- persist/fs/write.go | 8 +- serialize/decoder.go | 6 +- serialize/decoder_lifecycle_prop_test.go | 2 +- serialize/decoder_test.go | 16 +- serialize/encode_decode_prop_test.go | 4 +- serialize/encoder.go | 2 +- serialize/encoder_test.go | 4 +- sharding/shardset.go | 2 +- storage/block/retriever_manager.go | 12 +- storage/block/retriever_map_gen.go | 259 ++++++++++++++++++ storage/block/retriever_new_map_gen.go | 76 +++++ storage/block/wired_list_test.go | 2 +- storage/bootstrap/bootstrapper/base_test.go | 7 +- .../bootstrapper/commitlog/source.go | 2 +- .../commitlog/source_prop_test.go | 2 +- .../bootstrapper/commitlog/source_test.go | 14 +- storage/bootstrap/bootstrapper/fs/source.go | 9 +- .../bootstrap/bootstrapper/fs/source_test.go | 14 +- .../bootstrap/bootstrapper/peers/source.go | 12 +- .../bootstrapper/peers/source_test.go | 6 +- storage/bootstrap/result/map_gen.go | 259 ++++++++++++++++++ storage/bootstrap/result/new_map_gen.go | 76 +++++ storage/bootstrap/result/result.go | 48 ++-- storage/bootstrap/result/result_test.go | 28 +- storage/bootstrap/result/types.go | 4 +- storage/bootstrap_test.go | 57 ++++ storage/database.go | 35 +-- storage/database_test.go | 8 +- storage/index.go | 2 +- storage/namespace.go | 8 +- storage/namespace/map.go | 16 +- storage/namespace/metadata.go | 2 +- storage/namespace/metadata_map_gen.go | 259 ++++++++++++++++++ storage/namespace/metadata_new_map_gen.go | 76 +++++ storage/namespace_map_gen.go | 259 ++++++++++++++++++ storage/namespace_new_map_gen.go | 76 +++++ storage/namespace_test.go | 4 +- storage/repair.go | 15 +- storage/repair/map_gen.go | 259 ++++++++++++++++++ storage/repair/metadata.go | 25 +- storage/repair/metadata_test.go | 11 +- storage/repair/new_map_gen.go | 76 +++++ storage/repair/types.go | 2 +- storage/repair_test.go | 6 +- storage/shard.go | 40 ++- storage/shard_map_gen.go | 259 ++++++++++++++++++ storage/shard_new_map_gen.go | 76 +++++ storage/shard_test.go | 34 +-- storage/storage_mock.go | 2 +- storage/types.go | 4 +- tools/read_data_files/main/main.go | 2 +- tools/read_ids/main/main.go | 2 +- tools/verify_commitlogs/main/main.go | 2 +- ts/segment.go | 8 +- x/xio/reader.go | 4 +- x/xio/reader_test.go | 4 +- x/xpool/pool_test.go | 4 +- 98 files changed, 3078 insertions(+), 379 deletions(-) create mode 100644 client/received_blocks_map_gen.go create mode 100644 client/received_blocks_new_map.go create mode 100644 generated-source-files.mk create mode 100644 storage/block/retriever_map_gen.go create mode 100644 storage/block/retriever_new_map_gen.go create mode 100644 storage/bootstrap/result/map_gen.go create mode 100644 storage/bootstrap/result/new_map_gen.go create mode 100644 storage/namespace/metadata_map_gen.go create mode 100644 storage/namespace/metadata_new_map_gen.go create mode 100644 storage/namespace_map_gen.go create mode 100644 storage/namespace_new_map_gen.go create mode 100644 storage/repair/map_gen.go create mode 100644 storage/repair/new_map_gen.go create mode 100644 storage/shard_map_gen.go create mode 100644 storage/shard_new_map_gen.go diff --git a/.excludecoverage b/.excludecoverage index a1a9cb976b..af0248229a 100644 --- a/.excludecoverage +++ b/.excludecoverage @@ -1,4 +1,5 @@ _mock.go +_gen.go _matcher.go generated/ tools/ diff --git a/.excludefmt b/.excludefmt index 93bd389811..b85315f3a4 100644 --- a/.excludefmt +++ b/.excludefmt @@ -3,3 +3,4 @@ (gen-go/) (.pb.go) (_mock.go) +(_gen.go) diff --git a/.excludelint b/.excludelint index 5093d7ed90..de64fac537 100644 --- a/.excludelint +++ b/.excludelint @@ -1,3 +1,4 @@ (vendor/) (generated/) (_mock.go) +(_gen.go) diff --git a/.excludemetalint b/.excludemetalint index b987ab8a7d..c4646c2c81 100644 --- a/.excludemetalint +++ b/.excludemetalint @@ -1,4 +1,5 @@ vendor/ generated/ _mock.go +_gen.go _todo.go diff --git a/Makefile b/Makefile index 6e26e554c5..ba34eb2fc1 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,7 @@ gopath_prefix := $(GOPATH)/src license_dir := .ci/uber-licence license_node_modules := $(license_dir)/node_modules m3db_package := github.com/m3db/m3db +m3db_package_path := $(gopath_prefix)/$(m3db_package) metalint_check := .ci/metalint.sh metalint_config := .metalinter.json metalint_exclude := .excludemetalint @@ -23,6 +24,8 @@ thrift_rules_dir := generated/thrift vendor_prefix := vendor cache_policy ?= recently_read +include $(SELF_DIR)/generated-source-files.mk + BUILD := $(abspath ./bin) GO_BUILD_LDFLAGS := $(shell $(abspath ./.ci/go-build-ldflags.sh) $(m3db_package)) LINUX_AMD64_ENV := GOOS=linux GOARCH=amd64 CGO_ENABLED=0 @@ -138,7 +141,7 @@ proto-gen: install-proto-bin install-license-bin .PHONY: all-gen # NB(prateek): order matters here, mock-gen needs to be last because we sometimes # generate mocks for thrift/proto generated code. -all-gen: thrift-gen proto-gen mock-gen +all-gen: thrift-gen proto-gen mock-gen map-all-gen genny-all .PHONY: metalint metalint: install-metalinter install-linter-badtime diff --git a/client/host_queue.go b/client/host_queue.go index 1b48a7b8a7..146be90d24 100644 --- a/client/host_queue.go +++ b/client/host_queue.go @@ -183,64 +183,56 @@ func (q *queue) rotateOpsWithLock() []op { func (q *queue) drain() { var ( - currWriteOpsByNamespace = make(map[ident.Hash][]op) - currBatchElementsByNamespace = make(map[ident.Hash][]*rpc.WriteBatchRawRequestElement) - currTaggedWriteOpsByNamespace = make(map[ident.Hash][]op) - currTaggedBatchElementsByNamespace = make(map[ident.Hash][]*rpc.WriteTaggedBatchRawRequestElement) - writeBatchSize = q.opts.WriteBatchSize() + currWriteOpsByNamespace namespaceWriteBatchOpsSlice + currTaggedWriteOpsByNamespace namespaceWriteTaggedBatchOpsSlice + writeBatchSize = q.opts.WriteBatchSize() ) for ops := range q.drainIn { - var ( - currWriteOps []op - currBatchElements []*rpc.WriteBatchRawRequestElement - currTaggedWriteOps []op - currTaggedBatchElements []*rpc.WriteTaggedBatchRawRequestElement - opsLen = len(ops) - ) + opsLen := len(ops) for i := 0; i < opsLen; i++ { switch v := ops[i].(type) { case *writeOperation: namespace := v.namespace - namespaceKey := namespace.Hash() - currWriteOps = currWriteOpsByNamespace[namespaceKey] - currBatchElements = currBatchElementsByNamespace[namespaceKey] - if currWriteOps == nil { - currWriteOps = q.opsArrayPool.Get() - currBatchElements = q.writeBatchRawRequestElementArrayPool.Get() + idx := currWriteOpsByNamespace.indexOf(namespace) + if idx == -1 { + value := namespaceWriteBatchOps{ + namespace: namespace, + opsArrayPool: q.opsArrayPool, + writeBatchRawRequestElementArrayPool: q.writeBatchRawRequestElementArrayPool, + } + idx = len(currWriteOpsByNamespace) + currWriteOpsByNamespace = append(currWriteOpsByNamespace, value) } - currWriteOps = append(currWriteOps, ops[i]) - currBatchElements = append(currBatchElements, &v.request) - currWriteOpsByNamespace[namespaceKey] = currWriteOps - currBatchElementsByNamespace[namespaceKey] = currBatchElements + currWriteOpsByNamespace.appendAt(idx, ops[i], &v.request) - if len(currWriteOps) == writeBatchSize { + if currWriteOpsByNamespace.lenAt(idx) == writeBatchSize { // Reached write batch limit, write async and reset - q.asyncWrite(namespace, currWriteOps, currBatchElements) - currWriteOpsByNamespace[namespaceKey] = nil - currBatchElementsByNamespace[namespaceKey] = nil + q.asyncWrite(namespace, currWriteOpsByNamespace[idx].ops, + currWriteOpsByNamespace[idx].elems) + currWriteOpsByNamespace.resetAt(idx) } case *writeTaggedOperation: namespace := v.namespace - namespaceKey := namespace.Hash() - currTaggedWriteOps = currTaggedWriteOpsByNamespace[namespaceKey] - currTaggedBatchElements = currTaggedBatchElementsByNamespace[namespaceKey] - if currTaggedWriteOps == nil { - currTaggedWriteOps = q.opsArrayPool.Get() - currTaggedBatchElements = q.writeTaggedBatchRawRequestElementArrayPool.Get() + idx := currTaggedWriteOpsByNamespace.indexOf(namespace) + if idx == -1 { + value := namespaceWriteTaggedBatchOps{ + namespace: namespace, + opsArrayPool: q.opsArrayPool, + writeTaggedBatchRawRequestElementArrayPool: q.writeTaggedBatchRawRequestElementArrayPool, + } + idx = len(currTaggedWriteOpsByNamespace) + currTaggedWriteOpsByNamespace = append(currTaggedWriteOpsByNamespace, value) } - currTaggedWriteOps = append(currTaggedWriteOps, ops[i]) - currTaggedBatchElements = append(currTaggedBatchElements, &v.request) - currTaggedWriteOpsByNamespace[namespaceKey] = currTaggedWriteOps - currTaggedBatchElementsByNamespace[namespaceKey] = currTaggedBatchElements + currTaggedWriteOpsByNamespace.appendAt(idx, ops[i], &v.request) - if len(currTaggedWriteOps) == writeBatchSize { + if currTaggedWriteOpsByNamespace.lenAt(idx) == writeBatchSize { // Reached write batch limit, write async and reset - q.asyncTaggedWrite(namespace, currTaggedWriteOps, currTaggedBatchElements) - currTaggedWriteOpsByNamespace[namespaceKey] = nil - currTaggedBatchElementsByNamespace[namespaceKey] = nil + q.asyncTaggedWrite(namespace, currTaggedWriteOpsByNamespace[idx].ops, + currTaggedWriteOpsByNamespace[idx].elems) + currTaggedWriteOpsByNamespace.resetAt(idx) } case *fetchBatchOp: q.asyncFetch(v) @@ -253,26 +245,28 @@ func (q *queue) drain() { } // If any outstanding write ops, async write - for _, writeOperations := range currWriteOpsByNamespace { - if len(writeOperations) > 0 { - namespace := writeOperations[0].(*writeOperation).namespace - namespaceKey := namespace.Hash() - q.asyncWrite(namespace, writeOperations, currBatchElementsByNamespace[namespaceKey]) - currWriteOpsByNamespace[namespaceKey] = nil - currBatchElementsByNamespace[namespaceKey] = nil + for i, writeOps := range currWriteOpsByNamespace { + if len(writeOps.ops) > 0 { + q.asyncWrite(writeOps.namespace, writeOps.ops, + writeOps.elems) } + // Zero the element + currWriteOpsByNamespace[i] = namespaceWriteBatchOps{} } + // Reset the slice + currWriteOpsByNamespace = currWriteOpsByNamespace[:0] // If any outstanding tagged write ops, async write - for _, writeOps := range currTaggedWriteOpsByNamespace { - if len(writeOps) > 0 { - namespace := writeOps[0].(*writeTaggedOperation).namespace - namespaceKey := namespace.Hash() - q.asyncTaggedWrite(namespace, writeOps, currTaggedBatchElementsByNamespace[namespaceKey]) - currTaggedWriteOpsByNamespace[namespaceKey] = nil - currTaggedBatchElementsByNamespace[namespaceKey] = nil + for i, writeOps := range currTaggedWriteOpsByNamespace { + if len(writeOps.ops) > 0 { + q.asyncTaggedWrite(writeOps.namespace, writeOps.ops, + writeOps.elems) } + // Zero the element + currTaggedWriteOpsByNamespace[i] = namespaceWriteTaggedBatchOps{} } + // Reset the slice + currTaggedWriteOpsByNamespace = currTaggedWriteOpsByNamespace[:0] if ops != nil { q.opsArrayPool.Put(ops) @@ -293,7 +287,7 @@ func (q *queue) asyncTaggedWrite( // TODO(r): Use a worker pool to avoid creating new go routines for async writes go func() { req := q.writeTaggedBatchRawRequestPool.Get() - req.NameSpace = namespace.Data().Get() + req.NameSpace = namespace.Data().Bytes() req.Elements = elems // NB(r): Defer is slow in the hot path unfortunately @@ -358,7 +352,7 @@ func (q *queue) asyncWrite( // TODO(r): Use a worker pool to avoid creating new go routines for async writes go func() { req := q.writeBatchRawRequestPool.Get() - req.NameSpace = namespace.Data().Get() + req.NameSpace = namespace.Data().Bytes() req.Elements = elems // NB(r): Defer is slow in the hot path unfortunately @@ -584,3 +578,107 @@ func errQueueUnknownOperation(hostID string) error { func errQueueFetchNoResponse(hostID string) error { return fmt.Errorf("host operation queue did not receive response for given fetch for host: %s", hostID) } + +// ops container types + +type namespaceWriteBatchOps struct { + namespace ident.ID + opsArrayPool *opArrayPool + writeBatchRawRequestElementArrayPool writeBatchRawRequestElementArrayPool + ops []op + elems []*rpc.WriteBatchRawRequestElement +} + +type namespaceWriteBatchOpsSlice []namespaceWriteBatchOps + +func (s namespaceWriteBatchOpsSlice) indexOf( + namespace ident.ID, +) int { + idx := -1 + for i := range s { + if s[i].namespace.Equal(namespace) { + return i + } + } + return idx +} + +func (s namespaceWriteBatchOpsSlice) appendAt( + index int, + op op, + elem *rpc.WriteBatchRawRequestElement, +) { + if s[index].ops == nil { + s[index].ops = s[index].opsArrayPool.Get() + } + if s[index].elems == nil { + s[index].elems = s[index].writeBatchRawRequestElementArrayPool.Get() + } + s[index].ops = append(s[index].ops, op) + s[index].elems = append(s[index].elems, elem) +} + +func (s namespaceWriteBatchOpsSlice) lenAt( + index int, +) int { + return len(s[index].ops) +} + +func (s namespaceWriteBatchOpsSlice) resetAt( + index int, +) { + s[index].ops = nil + s[index].elems = nil +} + +// TODO: use genny to make namespaceWriteBatchOps and namespaceWriteTaggedBatchOps +// share code (https://github.com/m3db/m3db/issues/531) +type namespaceWriteTaggedBatchOps struct { + namespace ident.ID + opsArrayPool *opArrayPool + writeTaggedBatchRawRequestElementArrayPool writeTaggedBatchRawRequestElementArrayPool + ops []op + elems []*rpc.WriteTaggedBatchRawRequestElement +} + +type namespaceWriteTaggedBatchOpsSlice []namespaceWriteTaggedBatchOps + +func (s namespaceWriteTaggedBatchOpsSlice) indexOf( + namespace ident.ID, +) int { + idx := -1 + for i := range s { + if s[i].namespace.Equal(namespace) { + return i + } + } + return idx +} + +func (s namespaceWriteTaggedBatchOpsSlice) appendAt( + index int, + op op, + elem *rpc.WriteTaggedBatchRawRequestElement, +) { + if s[index].ops == nil { + s[index].ops = s[index].opsArrayPool.Get() + } + if s[index].elems == nil { + s[index].elems = s[index].writeTaggedBatchRawRequestElementArrayPool.Get() + } + s[index].ops = append(s[index].ops, op) + s[index].elems = append(s[index].elems, elem) +} + +func (s namespaceWriteTaggedBatchOpsSlice) lenAt( + index int, +) int { + return len(s[index].ops) +} + +func (s namespaceWriteTaggedBatchOpsSlice) resetAt( + index int, +) { + s[index].ops = nil + s[index].elems = nil +} diff --git a/client/received_blocks_map_gen.go b/client/received_blocks_map_gen.go new file mode 100644 index 0000000000..bfec10accf --- /dev/null +++ b/client/received_blocks_map_gen.go @@ -0,0 +1,251 @@ +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +package client + +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// receivedBlocksMapHash is the hash for a given map entry, this is public to support +// iterating over the map using a native Go for loop. +type receivedBlocksMapHash uint64 + +// receivedBlocksMapHashFn is the hash function to execute when hashing a key. +type receivedBlocksMapHashFn func(idAndBlockStart) receivedBlocksMapHash + +// receivedBlocksMapEqualsFn is the equals key function to execute when detecting equality of a key. +type receivedBlocksMapEqualsFn func(idAndBlockStart, idAndBlockStart) bool + +// receivedBlocksMapCopyFn is the copy key function to execute when copying the key. +type receivedBlocksMapCopyFn func(idAndBlockStart) idAndBlockStart + +// receivedBlocksMapFinalizeFn is the finalize key function to execute when finished with a key. +type receivedBlocksMapFinalizeFn func(idAndBlockStart) + +// receivedBlocksMap uses the genny package to provide a generic hash map that can be specialized +// by running the following command from this root of the repository: +// ``` +// make hashmap-gen pkg=outpkg key_type=Type value_type=Type out_dir=/tmp +// ``` +// Or if you would like to use bytes or ident.ID as keys you can use the +// partially specialized maps to generate your own maps as well: +// ``` +// make byteshashmap-gen pkg=outpkg value_type=Type out_dir=/tmp +// make idhashmap-gen pkg=outpkg value_type=Type out_dir=/tmp +// ``` +// This will output to stdout the generated source file to use for your map. +// It uses linear probing by incrementing the number of the hash created when +// hashing the identifier if there is a collision. +// receivedBlocksMap is a value type and not an interface to allow for less painful +// upgrades when adding/removing methods, it is not likely to need mocking so +// an interface would not be super useful either. +type receivedBlocksMap struct { + _receivedBlocksMapOptions + + // lookup uses hash of the identifier for the key and the MapEntry value + // wraps the value type and the key (used to ensure lookup is correct + // when dealing with collisions), we use uint64 for the hash partially + // because lookups of maps with uint64 keys has a fast path for Go. + lookup map[receivedBlocksMapHash]receivedBlocksMapEntry +} + +// _receivedBlocksMapOptions is a set of options used when creating an identifier map, it is kept +// private so that implementers of the generated map can specify their own options +// that partially fulfill these options. +type _receivedBlocksMapOptions struct { + // hash is the hash function to execute when hashing a key. + hash receivedBlocksMapHashFn + // equals is the equals key function to execute when detecting equality. + equals receivedBlocksMapEqualsFn + // copy is the copy key function to execute when copying the key. + copy receivedBlocksMapCopyFn + // finalize is the finalize key function to execute when finished with a + // key, this is optional to specify. + finalize receivedBlocksMapFinalizeFn + // initialSize is the initial size for the map, use zero to use Go's std map + // initial size and consequently is optional to specify. + initialSize int +} + +// receivedBlocksMapEntry is an entry in the map, this is public to support iterating +// over the map using a native Go for loop. +type receivedBlocksMapEntry struct { + // key is used to check equality on lookups to resolve collisions + key _receivedBlocksMapKey + // value type stored + value receivedBlocks +} + +type _receivedBlocksMapKey struct { + key idAndBlockStart + finalize bool +} + +// Key returns the map entry key. +func (e receivedBlocksMapEntry) Key() idAndBlockStart { + return e.key.key +} + +// Value returns the map entry value. +func (e receivedBlocksMapEntry) Value() receivedBlocks { + return e.value +} + +// _receivedBlocksMapAlloc is a non-exported function so that when generating the source code +// for the map you can supply a public constructor that sets the correct +// hash, equals, copy, finalize options without users of the map needing to +// implement them themselves. +func _receivedBlocksMapAlloc(opts _receivedBlocksMapOptions) *receivedBlocksMap { + m := &receivedBlocksMap{_receivedBlocksMapOptions: opts} + m.Reallocate() + return m +} + +func (m *receivedBlocksMap) newMapKey(k idAndBlockStart, opts _receivedBlocksMapKeyOptions) _receivedBlocksMapKey { + key := _receivedBlocksMapKey{key: k, finalize: opts.finalizeKey} + if !opts.copyKey { + return key + } + + key.key = m.copy(k) + return key +} + +func (m *receivedBlocksMap) removeMapKey(hash receivedBlocksMapHash, key _receivedBlocksMapKey) { + delete(m.lookup, hash) + if key.finalize { + m.finalize(key.key) + } +} + +// Get returns a value in the map for an identifier if found. +func (m *receivedBlocksMap) Get(k idAndBlockStart) (receivedBlocks, bool) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + return entry.value, true + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } + var empty receivedBlocks + return empty, false +} + +// Set will set the value for an identifier. +func (m *receivedBlocksMap) Set(k idAndBlockStart, v receivedBlocks) { + m.set(k, v, _receivedBlocksMapKeyOptions{ + copyKey: true, + finalizeKey: m.finalize != nil, + }) +} + +// receivedBlocksMapSetUnsafeOptions is a set of options to use when setting a value with +// the SetUnsafe method. +type receivedBlocksMapSetUnsafeOptions struct { + NoCopyKey bool + NoFinalizeKey bool +} + +// SetUnsafe will set the value for an identifier with unsafe options for how +// the map treats the key. +func (m *receivedBlocksMap) SetUnsafe(k idAndBlockStart, v receivedBlocks, opts receivedBlocksMapSetUnsafeOptions) { + m.set(k, v, _receivedBlocksMapKeyOptions{ + copyKey: !opts.NoCopyKey, + finalizeKey: !opts.NoFinalizeKey, + }) +} + +type _receivedBlocksMapKeyOptions struct { + copyKey bool + finalizeKey bool +} + +func (m *receivedBlocksMap) set(k idAndBlockStart, v receivedBlocks, opts _receivedBlocksMapKeyOptions) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + m.lookup[hash] = receivedBlocksMapEntry{ + key: entry.key, + value: v, + } + return + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } + + m.lookup[hash] = receivedBlocksMapEntry{ + key: m.newMapKey(k, opts), + value: v, + } +} + +// Iter provides the underlying map to allow for using a native Go for loop +// to iterate the map, however callers should only ever read and not write +// the map. +func (m *receivedBlocksMap) Iter() map[receivedBlocksMapHash]receivedBlocksMapEntry { + return m.lookup +} + +// Len returns the number of map entries in the map. +func (m *receivedBlocksMap) Len() int { + return len(m.lookup) +} + +// Contains returns true if value exists for key, false otherwise, it is +// shorthand for a call to Get that doesn't return the value. +func (m *receivedBlocksMap) Contains(k idAndBlockStart) bool { + _, ok := m.Get(k) + return ok +} + +// Delete will remove a value set in the map for the specified key. +func (m *receivedBlocksMap) Delete(k idAndBlockStart) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + m.removeMapKey(hash, entry.key) + return + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } +} + +// Reset will reset the map by simply deleting all keys to avoid +// allocating a new map. +func (m *receivedBlocksMap) Reset() { + for hash, entry := range m.lookup { + m.removeMapKey(hash, entry.key) + } +} + +// Reallocate will avoid deleting all keys and reallocate a new +// map, this is useful if you believe you have a large map and +// will not need to grow back to a similar size. +func (m *receivedBlocksMap) Reallocate() { + if m.initialSize > 0 { + m.lookup = make(map[receivedBlocksMapHash]receivedBlocksMapEntry, m.initialSize) + } else { + m.lookup = make(map[receivedBlocksMapHash]receivedBlocksMapEntry) + } +} diff --git a/client/received_blocks_new_map.go b/client/received_blocks_new_map.go new file mode 100644 index 0000000000..4f0ad83318 --- /dev/null +++ b/client/received_blocks_new_map.go @@ -0,0 +1,58 @@ +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package client + +import ( + "github.com/m3db/m3x/ident" + "github.com/m3db/m3x/pool" + + "github.com/cespare/xxhash" +) + +func newReceivedBlocksMap(pool pool.BytesPool) *receivedBlocksMap { + return _receivedBlocksMapAlloc(_receivedBlocksMapOptions{ + hash: func(k idAndBlockStart) receivedBlocksMapHash { + // NB(r): Similar to the standard composite key hashes for Java objects + hash := uint64(7) + hash = 31*hash + xxhash.Sum64(k.id.Bytes()) + hash = 31*hash + uint64(k.blockStart) + return receivedBlocksMapHash(hash) + }, + equals: func(x, y idAndBlockStart) bool { + return x.id.Equal(y.id) && x.blockStart == y.blockStart + }, + copy: func(k idAndBlockStart) idAndBlockStart { + bytes := k.id.Bytes() + keyLen := len(bytes) + pooled := pool.Get(keyLen)[:keyLen] + copy(pooled, bytes) + return idAndBlockStart{ + id: ident.BytesID(pooled), + blockStart: k.blockStart, + } + }, + finalize: func(k idAndBlockStart) { + if slice, ok := k.id.(ident.BytesID); ok { + pool.Put(slice) + } + }, + }) +} diff --git a/client/session.go b/client/session.go index 44129e1318..5f423f3b49 100644 --- a/client/session.go +++ b/client/session.go @@ -943,7 +943,7 @@ func (s *session) writeAttemptWithRLock( wop := s.writeOperationPool.Get() wop.namespace = nsID wop.shardID = s.state.topoMap.ShardSet().Lookup(tsID) - wop.request.ID = tsID.Data().Get() + wop.request.ID = tsID.Data().Bytes() wop.request.Datapoint.Value = value wop.request.Datapoint.Timestamp = timestamp wop.request.Datapoint.TimestampTimeType = timeType @@ -953,12 +953,12 @@ func (s *session) writeAttemptWithRLock( wop := s.writeTaggedOperationPool.Get() wop.namespace = nsID wop.shardID = s.state.topoMap.ShardSet().Lookup(tsID) - wop.request.ID = tsID.Data().Get() + wop.request.ID = tsID.Data().Bytes() encodedTagBytes, ok := tagEncoder.Data() if !ok { return nil, 0, 0, errUnableToEncodeTags } - wop.request.EncodedTags = encodedTagBytes.Get() + wop.request.EncodedTags = encodedTagBytes.Bytes() wop.request.Datapoint.Value = value wop.request.Datapoint.Timestamp = timestamp wop.request.Datapoint.TimestampTimeType = timeType @@ -1285,7 +1285,7 @@ func (s *session) fetchIDsAttempt( } // Append IDWithNamespace to this request - f.append(namespace.Data().Get(), tsID.Data().Get(), completionFn) + f.append(namespace.Data().Bytes(), tsID.Data().Bytes(), completionFn) }); err != nil { routeErr = err break @@ -1456,7 +1456,7 @@ func (s *session) Truncate(namespace ident.ID) (int64, error) { ) t := &truncateOp{} - t.request.NameSpace = namespace.Data().Get() + t.request.NameSpace = namespace.Data().Bytes() t.completionFn = func(result interface{}, err error) { if err != nil { resultErrLock.Lock() @@ -1895,7 +1895,7 @@ func (s *session) streamBlocksMetadataFromPeer( attemptFn := func(client rpc.TChanNode) error { tctx, _ := thrift.NewContext(s.streamBlocksMetadataBatchTimeout) req := rpc.NewFetchBlocksMetadataRawRequest() - req.NameSpace = namespace.Data().Get() + req.NameSpace = namespace.Data().Bytes() req.Shard = int32(shard) req.RangeStart = start.UnixNano() req.RangeEnd = end.UnixNano() @@ -2062,7 +2062,7 @@ func (s *session) streamBlocksMetadataFromPeerV2( attemptFn := func(client rpc.TChanNode) error { tctx, _ := thrift.NewContext(s.streamBlocksMetadataBatchTimeout) req := rpc.NewFetchBlocksMetadataRawV2Request() - req.NameSpace = namespace.Data().Get() + req.NameSpace = namespace.Data().Bytes() req.Shard = int32(shard) req.RangeStart = start.UnixNano() req.RangeEnd = end.UnixNano() @@ -2188,19 +2188,21 @@ func (s *session) streamBlocksFromPeers( var ( enqueueCh = newEnqueueChannel(progress) peerBlocksBatchSize = s.streamBlocksBatchSize + numPeers = len(peers.peers) + uncheckedBytesPool = opts.DatabaseBlockOptions().BytesPool().BytesPool() ) // Consume the incoming metadata and enqueue to the ready channel // Spin up background goroutine to consume go func() { - streamMetadataFn(len(peers.peers), metadataCh, enqueueCh) + streamMetadataFn(numPeers, metadataCh, enqueueCh, uncheckedBytesPool) // Begin assessing the queue and how much is processed, once queue // is entirely processed then we can close the enqueue channel enqueueCh.closeOnAllProcessed() }() // Fetch blocks from peers as results become ready - peerQueues := make(peerBlocksQueues, 0, len(peers.peers)) + peerQueues := make(peerBlocksQueues, 0, numPeers) for _, peer := range peers.peers { peer := peer size := peerBlocksBatchSize @@ -2256,12 +2258,14 @@ type streamBlocksMetadataFn func( peersLen int, ch <-chan receivedBlockMetadata, enqueueCh enqueueChannel, + pool pool.BytesPool, ) func (s *session) passThroughBlocksMetadata( peersLen int, ch <-chan receivedBlockMetadata, enqueueCh enqueueChannel, + _ pool.BytesPool, ) { // Receive off of metadata channel for { @@ -2278,8 +2282,10 @@ func (s *session) streamAndGroupCollectedBlocksMetadata( peersLen int, metadataCh <-chan receivedBlockMetadata, enqueueCh enqueueChannel, + pool pool.BytesPool, ) { - metadata := make(map[hashAndBlockStart]receivedBlocks) + metadata := newReceivedBlocksMap(pool) + defer metadata.Reset() // Delete all the keys and return slices to pools for { m, ok := <-metadataCh @@ -2287,11 +2293,11 @@ func (s *session) streamAndGroupCollectedBlocksMetadata( break } - key := hashAndBlockStart{ - hash: m.id.Hash(), + key := idAndBlockStart{ + id: m.id, blockStart: m.block.start.UnixNano(), } - received, ok := metadata[key] + received, ok := metadata.Get(key) if !ok { received = receivedBlocks{ results: make([]receivedBlockMetadata, 0, peersLen), @@ -2334,12 +2340,13 @@ func (s *session) streamAndGroupCollectedBlocksMetadata( } // Ensure tracking enqueued by setting modified result back to map - metadata[key] = received + metadata.Set(key, received) } // Enqueue all unenqueued received metadata. Note that these entries will have // metadata from only a subset of their peers. - for _, received := range metadata { + for _, entry := range metadata.Iter() { + received := entry.Value() if received.enqueued { continue } @@ -2646,7 +2653,7 @@ func (s *session) streamBlocksBatchFromPeer( retention = ropts.RetentionPeriod() earliestBlockStart = nowFn().Add(-retention).Truncate(blockSize) ) - req.NameSpace = namespaceMetadata.ID().Data().Get() + req.NameSpace = namespaceMetadata.ID().Data().Bytes() req.Shard = int32(shard) req.Elements = make([]*rpc.FetchBlocksRawRequestElement, 0, len(batch)) for i := range batch { @@ -2655,7 +2662,7 @@ func (s *session) streamBlocksBatchFromPeer( continue // Fell out of retention while we were streaming blocks } req.Elements = append(req.Elements, &rpc.FetchBlocksRawRequestElement{ - ID: batch[i].id.Data().Get(), + ID: batch[i].id.Data().Bytes(), Starts: []int64{blockStart.UnixNano()}, }) reqBlocksLen++ @@ -2709,7 +2716,7 @@ func (s *session) streamBlocksBatchFromPeer( } id := batch[i].id - if !bytes.Equal(id.Data().Get(), result.Elements[i].ID) { + if !bytes.Equal(id.Data().Bytes(), result.Elements[i].ID) { blocksErr := fmt.Errorf( "stream blocks mismatched ID: expectedID=%s, actualID=%s, indexID=%d, peer=%s", batch[i].id.String(), id.String(), i, peer.Host().String(), @@ -3581,14 +3588,16 @@ func (it *metadataIter) Err() error { return it.err } -type hashAndBlockStart struct { - hash ident.Hash +type idAndBlockStart struct { + id ident.ID blockStart int64 } // IsValidFetchBlocksMetadataEndpoint returns a bool indicating whether the // specified endpointVersion is valid -func IsValidFetchBlocksMetadataEndpoint(endpointVersion FetchBlocksMetadataEndpointVersion) bool { +func IsValidFetchBlocksMetadataEndpoint( + endpointVersion FetchBlocksMetadataEndpointVersion, +) bool { for _, version := range validFetchBlocksMetadataEndpoints { if version == endpointVersion { return true diff --git a/client/session_fetch_bulk_blocks_test.go b/client/session_fetch_bulk_blocks_test.go index d3c9cbeacd..cc78c03ecd 100644 --- a/client/session_fetch_bulk_blocks_test.go +++ b/client/session_fetch_bulk_blocks_test.go @@ -691,7 +691,7 @@ func assertFetchBlocksFromPeersResult( seg, err := stream.Segment() require.NoError(t, err) - actualData := append(seg.Head.Get(), seg.Tail.Get()...) + actualData := append(seg.Head.Bytes(), seg.Tail.Bytes()...) // compare actual v expected data if len(expectedData) != len(actualData) { @@ -1476,9 +1476,13 @@ func TestStreamBlocksBatchFromPeerVerifiesBlockErr(t *testing.T) { assertEnqueueChannel(t, batch[2:], enqueueCh) // Assert length of blocks result - assert.Equal(t, 2, len(r.result.AllSeries())) - assert.Equal(t, 1, r.result.AllSeries()[fooID.Hash()].Blocks.Len()) - assert.Equal(t, 1, r.result.AllSeries()[barID.Hash()].Blocks.Len()) + assert.Equal(t, 2, r.result.AllSeries().Len()) + fooBlocks, ok := r.result.AllSeries().Get(fooID) + require.True(t, ok) + assert.Equal(t, 1, fooBlocks.Blocks.Len()) + barBlocks, ok := r.result.AllSeries().Get(barID) + require.True(t, ok) + assert.Equal(t, 1, barBlocks.Blocks.Len()) assert.NoError(t, session.Close()) } @@ -1625,12 +1629,18 @@ func TestStreamBlocksBatchFromPeerVerifiesBlockChecksum(t *testing.T) { assertEnqueueChannel(t, batch[1:2], enqueueCh) // Assert length of blocks result - assert.Equal(t, 2, len(r.result.AllSeries())) - assert.Equal(t, 1, r.result.AllSeries()[fooID.Hash()].Blocks.Len()) - _, ok := r.result.AllSeries()[fooID.Hash()].Blocks.BlockAt(start) + assert.Equal(t, 2, r.result.AllSeries().Len()) + + fooBlocks, ok := r.result.AllSeries().Get(fooID) + require.True(t, ok) + assert.Equal(t, 1, fooBlocks.Blocks.Len()) + _, ok = fooBlocks.Blocks.BlockAt(start) assert.True(t, ok) - assert.Equal(t, 1, r.result.AllSeries()[barID.Hash()].Blocks.Len()) - _, ok = r.result.AllSeries()[barID.Hash()].Blocks.BlockAt(start.Add(blockSize)) + + barBlocks, ok := r.result.AllSeries().Get(barID) + require.True(t, ok) + assert.Equal(t, 1, barBlocks.Blocks.Len()) + _, ok = barBlocks.Blocks.BlockAt(start.Add(blockSize)) assert.True(t, ok) assert.NoError(t, session.Close()) @@ -1654,9 +1664,9 @@ func TestBlocksResultAddBlockFromPeerReadMerged(t *testing.T) { r.addBlockFromPeer(fooID, testHost, bl) series := r.result.AllSeries() - assert.Equal(t, 1, len(series)) + assert.Equal(t, 1, series.Len()) - sl, ok := series[fooID.Hash()] + sl, ok := series.Get(fooID) assert.True(t, ok) blocks := sl.Blocks assert.Equal(t, 1, blocks.Len()) @@ -1674,8 +1684,8 @@ func TestBlocksResultAddBlockFromPeerReadMerged(t *testing.T) { require.NoError(t, err) // Assert block has data - assert.Equal(t, []byte{1, 2}, seg.Head.Get()) - assert.Equal(t, []byte{3}, seg.Tail.Get()) + assert.Equal(t, []byte{1, 2}, seg.Head.Bytes()) + assert.Equal(t, []byte{3}, seg.Tail.Bytes()) } func TestBlocksResultAddBlockFromPeerReadUnmerged(t *testing.T) { @@ -1727,7 +1737,7 @@ func TestBlocksResultAddBlockFromPeerReadUnmerged(t *testing.T) { all = append(all, val) } result := encoder.Discard() - seg := &rpc.Segment{Head: result.Head.Get(), Tail: result.Tail.Get()} + seg := &rpc.Segment{Head: result.Head.Bytes(), Tail: result.Tail.Bytes()} bl.Segments.Unmerged = append(bl.Segments.Unmerged, seg) } @@ -1735,9 +1745,9 @@ func TestBlocksResultAddBlockFromPeerReadUnmerged(t *testing.T) { r.addBlockFromPeer(fooID, testHost, bl) series := r.result.AllSeries() - assert.Equal(t, 1, len(series)) + assert.Equal(t, 1, series.Len()) - sl, ok := series[fooID.Hash()] + sl, ok := series.Get(fooID) assert.True(t, ok) blocks := sl.Blocks assert.Equal(t, 1, blocks.Len()) @@ -2070,7 +2080,7 @@ func expectFetchMetadataAndReturn( ) for j := beginIdx; j < len(result) && j < beginIdx+batchSize; j++ { elem := &rpc.BlocksMetadata{} - elem.ID = result[j].id.Data().Get() + elem.ID = result[j].id.Data().Bytes() for k := 0; k < len(result[j].blocks); k++ { bl := &rpc.BlockMetadata{} bl.Start = result[j].blocks[k].start.UnixNano() @@ -2122,7 +2132,7 @@ func expectFetchMetadataAndReturnV2( beginIdx = i * batchSize ) for j := beginIdx; j < len(result) && j < beginIdx+batchSize; j++ { - id := result[j].id.Data().Get() + id := result[j].id.Data().Bytes() for k := 0; k < len(result[j].blocks); k++ { bl := &rpc.BlockMetadataV2{} bl.ID = id @@ -2274,7 +2284,7 @@ func expectFetchBlocksAndReturn( ret := &rpc.FetchBlocksRawResult_{} for _, res := range result[i] { blocks := &rpc.Blocks{} - blocks.ID = res.id.Data().Get() + blocks.ID = res.id.Data().Bytes() for j := range res.blocks { bl := &rpc.Block{} bl.Start = res.blocks[j].start.UnixNano() @@ -2399,11 +2409,11 @@ func assertFetchBootstrapBlocksResult( defer ctx.Close() series := actual.AllSeries() - assert.Equal(t, len(expected), len(series)) + assert.Equal(t, len(expected), series.Len()) for i := range expected { id := expected[i].id - entry, ok := series[id.Hash()] + entry, ok := series.Get(id) if !ok { assert.Fail(t, fmt.Sprintf("blocks for series '%s' not present", id.String())) continue @@ -2431,7 +2441,7 @@ func assertFetchBootstrapBlocksResult( require.NoError(t, err) seg, err := stream.Segment() require.NoError(t, err) - actualData := append(seg.Head.Get(), seg.Tail.Get()...) + actualData := append(seg.Head.Bytes(), seg.Tail.Bytes()...) assert.Equal(t, expectedData, actualData) } else if block.segments.unmerged != nil { assert.Fail(t, "unmerged comparison not supported") diff --git a/client/session_fetch_high_concurrency_test.go b/client/session_fetch_high_concurrency_test.go index 49bb1384bc..3997814c2a 100644 --- a/client/session_fetch_high_concurrency_test.go +++ b/client/session_fetch_high_concurrency_test.go @@ -90,7 +90,7 @@ func TestSessionFetchIDsHighConcurrency(t *testing.T) { } seg := encoder.Discard() respSegments := []*rpc.Segments{&rpc.Segments{ - Merged: &rpc.Segment{Head: seg.Head.Get(), Tail: seg.Tail.Get()}, + Merged: &rpc.Segment{Head: seg.Head.Bytes(), Tail: seg.Tail.Bytes()}, }} respElements := make([]*rpc.FetchRawResult_, maxIDs) for i := range respElements { diff --git a/client/session_fetch_test.go b/client/session_fetch_test.go index c8cece4901..b9ccfad57c 100644 --- a/client/session_fetch_test.go +++ b/client/session_fetch_test.go @@ -489,7 +489,7 @@ func fulfillTszFetchBatchOps( } seg := encoder.Discard() op.completionFns[i]([]*rpc.Segments{&rpc.Segments{ - Merged: &rpc.Segment{Head: seg.Head.Get(), Tail: seg.Tail.Get()}, + Merged: &rpc.Segment{Head: seg.Head.Bytes(), Tail: seg.Tail.Bytes()}, }}, nil) calledCompletionFn = true break diff --git a/client/session_write_tagged_test.go b/client/session_write_tagged_test.go index f98b77a7b3..58ecd23840 100644 --- a/client/session_write_tagged_test.go +++ b/client/session_write_tagged_test.go @@ -76,7 +76,7 @@ func TestSessionWriteTagged(t *testing.T) { assert.True(t, ok) assert.Equal(t, w.ns.String(), write.namespace.String()) assert.Equal(t, w.id.String(), string(write.request.ID)) - assert.Equal(t, testEncodeTags(w.tags).Get(), write.request.EncodedTags) + assert.Equal(t, testEncodeTags(w.tags).Bytes(), write.request.EncodedTags) assert.Equal(t, w.value, write.request.Datapoint.Value) assert.Equal(t, w.t.Unix(), write.request.Datapoint.Timestamp) assert.Equal(t, rpc.TimeType_UNIX_SECONDS, write.request.Datapoint.TimestampTimeType) @@ -267,7 +267,7 @@ func TestSessionWriteTaggedRetry(t *testing.T) { write, ok := op.(*writeTaggedOperation) assert.True(t, ok) assert.Equal(t, w.id.String(), string(write.request.ID)) - assert.Equal(t, string(testEncodeTags(w.tags).Get()), string(write.request.EncodedTags)) + assert.Equal(t, string(testEncodeTags(w.tags).Bytes()), string(write.request.EncodedTags)) assert.Equal(t, w.value, write.request.Datapoint.Value) assert.Equal(t, w.t.Unix(), write.request.Datapoint.Timestamp) assert.Equal(t, rpc.TimeType_UNIX_SECONDS, write.request.Datapoint.TimestampTimeType) diff --git a/digest/digest.go b/digest/digest.go index 88a1ab17f1..caeb7d3594 100644 --- a/digest/digest.go +++ b/digest/digest.go @@ -38,10 +38,10 @@ func NewDigest() stackadler32.Digest { func SegmentChecksum(segment ts.Segment) uint32 { d := stackadler32.NewDigest() if segment.Head != nil { - d = d.Update(segment.Head.Get()) + d = d.Update(segment.Head.Bytes()) } if segment.Tail != nil { - d = d.Update(segment.Tail.Get()) + d = d.Update(segment.Tail.Bytes()) } return d.Sum32() } diff --git a/encoding/m3tsz/encoder.go b/encoding/m3tsz/encoder.go index 5cb49dddbb..623fa45b3b 100644 --- a/encoding/m3tsz/encoder.go +++ b/encoding/m3tsz/encoder.go @@ -580,7 +580,7 @@ func (enc *encoder) segment(resType resultType) ts.Segment { // of the encoder data. var head checked.Bytes buffer, pos := enc.os.Rawbytes() - lastByte := buffer.Get()[length-1] + lastByte := buffer.Bytes()[length-1] if resType == byRefResultType { // Take ref from the ostream head = enc.os.Discard() @@ -598,7 +598,7 @@ func (enc *encoder) segment(resType resultType) ts.Segment { defer head.DecRef() // Copy up to last byte - head.AppendAll(buffer.Get()[:length-1]) + head.AppendAll(buffer.Bytes()[:length-1]) } // Take a shared ref to a known good tail diff --git a/encoding/m3tsz/encoder_test.go b/encoding/m3tsz/encoder_test.go index 9a1a705d49..92bb69b2b6 100644 --- a/encoding/m3tsz/encoder_test.go +++ b/encoding/m3tsz/encoder_test.go @@ -66,7 +66,7 @@ func TestWriteDeltaOfDeltaTimeUnitUnchanged(t *testing.T) { encoder.Reset(testStartTime, 0) encoder.writeDeltaOfDeltaTimeUnitUnchanged(0, input.delta, input.timeUnit) b, p := encoder.os.Rawbytes() - require.Equal(t, input.expectedBytes, b.Get()) + require.Equal(t, input.expectedBytes, b.Bytes()) require.Equal(t, input.expectedPos, p) } } @@ -86,7 +86,7 @@ func TestWriteDeltaOfDeltaTimeUnitChanged(t *testing.T) { encoder.Reset(testStartTime, 0) encoder.writeDeltaOfDeltaTimeUnitChanged(0, input.delta) b, p := encoder.os.Rawbytes() - require.Equal(t, input.expectedBytes, b.Get()) + require.Equal(t, input.expectedBytes, b.Bytes()) require.Equal(t, input.expectedPos, p) } } @@ -107,7 +107,7 @@ func TestWriteValue(t *testing.T) { encoder.Reset(testStartTime, 0) encoder.writeXOR(input.previousXOR, input.currentXOR) b, p := encoder.os.Rawbytes() - require.Equal(t, input.expectedBytes, b.Get()) + require.Equal(t, input.expectedBytes, b.Bytes()) require.Equal(t, input.expectedPos, p) } } @@ -140,7 +140,7 @@ func TestWriteAnnotation(t *testing.T) { encoder.Reset(testStartTime, 0) encoder.writeAnnotation(input.annotation) b, p := encoder.os.Rawbytes() - require.Equal(t, input.expectedBytes, b.Get()) + require.Equal(t, input.expectedBytes, b.Bytes()) require.Equal(t, input.expectedPos, p) } } @@ -188,7 +188,7 @@ func TestWriteTimeUnit(t *testing.T) { encoder.tu = xtime.None assert.Equal(t, input.expectedResult, encoder.writeTimeUnit(input.timeUnit)) b, p := encoder.os.Rawbytes() - assert.Equal(t, input.expectedBytes, b.Get()) + assert.Equal(t, input.expectedBytes, b.Bytes()) assert.Equal(t, input.expectedPos, p) } } @@ -225,7 +225,7 @@ func TestEncodeNoAnnotation(t *testing.T) { } b, p := encoder.os.Rawbytes() - require.Equal(t, expectedBuffer, b.Get()) + require.Equal(t, expectedBuffer, b.Bytes()) require.Equal(t, 6, p) } @@ -258,7 +258,7 @@ func TestEncodeWithAnnotation(t *testing.T) { } b, p := encoder.os.Rawbytes() - require.Equal(t, expectedBuffer, b.Get()) + require.Equal(t, expectedBuffer, b.Bytes()) require.Equal(t, 4, p) expectedBytes := []byte{ @@ -369,7 +369,7 @@ func TestEncoderResets(t *testing.T) { require.Equal(t, 0, enc.os.Len()) require.Equal(t, nil, enc.Stream()) b, _ := enc.os.Rawbytes() - require.Equal(t, []byte{}, b.Get()) + require.Equal(t, []byte{}, b.Bytes()) enc.Encode(ts.Datapoint{now, 13}, xtime.Second, nil) require.True(t, enc.os.Len() > 0) @@ -378,5 +378,5 @@ func TestEncoderResets(t *testing.T) { require.Equal(t, 0, enc.os.Len()) require.Equal(t, nil, enc.Stream()) b, _ = enc.os.Rawbytes() - require.Equal(t, []byte{}, b.Get()) + require.Equal(t, []byte{}, b.Bytes()) } diff --git a/encoding/ostream.go b/encoding/ostream.go index 1dd3939f28..7183ac078b 100644 --- a/encoding/ostream.go +++ b/encoding/ostream.go @@ -88,7 +88,7 @@ func (os *ostream) grow(v byte, np int) { } func (os *ostream) fillUnused(v byte) { - os.rawBuffer.Get()[os.lastIndex()] |= v >> uint(os.pos) + os.rawBuffer.Bytes()[os.lastIndex()] |= v >> uint(os.pos) } // WriteBit writes the last bit of v. diff --git a/encoding/ostream_test.go b/encoding/ostream_test.go index 65dab5c07e..6e274cb544 100644 --- a/encoding/ostream_test.go +++ b/encoding/ostream_test.go @@ -49,7 +49,7 @@ func TestWriteBits(t *testing.T) { require.True(t, os.Empty()) for _, input := range inputs { os.WriteBits(input.value, input.numBits) - require.Equal(t, input.expectedBytes, os.rawBuffer.Get()) + require.Equal(t, input.expectedBytes, os.rawBuffer.Bytes()) require.Equal(t, input.expectedPos, os.pos) } require.False(t, os.Empty()) @@ -60,7 +60,7 @@ func TestWriteBytes(t *testing.T) { os := o.(*ostream) rawBytes := []byte{0x1, 0x2} os.WriteBytes(rawBytes) - require.Equal(t, rawBytes, os.rawBuffer.Get()) + require.Equal(t, rawBytes, os.rawBuffer.Bytes()) require.Equal(t, 8, os.pos) } diff --git a/generated-source-files.mk b/generated-source-files.mk new file mode 100644 index 0000000000..73de2a94b9 --- /dev/null +++ b/generated-source-files.mk @@ -0,0 +1,125 @@ +m3x_package := github.com/m3db/m3x +m3x_package_path := $(gopath_prefix)/$(m3x_package) +m3x_package_min_ver := 29bc232d9ad2e6c4a1804ccce095bb730fb1c6bc + +.PHONY: install-m3x-repo +install-m3x-repo: install-glide install-generics-bin + # Check if repository exists, if not get it + test -d $(m3x_package_path) || go get -u $(m3x_package) + test -d $(m3x_package_path)/vendor || (cd $(m3x_package_path) && glide install) + test "$(shell cd $(m3x_package_path) && git diff --shortstat 2>/dev/null)" = "" || ( \ + echo "WARNING: m3x repository is dirty, generated files might not be as expected" \ + ) + # If does exist but not at min version then update it + (cd $(m3x_package_path) && git cat-file -t $(m3x_package_min_ver) > /dev/null) || ( \ + echo "WARNING: m3x repository is below commit $(m3x_package_min_ver), generated files might not be as expected" \ + ) + +# Generation rule for all generated types +.PHONY: genny-all +genny-all: genny-map-all + +# Tests that all currently generated types match their contents if they were regenerated +.PHONY: test-genny-all +test-genny-all: test-genny-map-all + +# Map generation rule for all generated maps +.PHONY: genny-map-all +genny-map-all: genny-map-client-received-blocks genny-map-storage-block-retriever genny-map-storage-bootstrap-result genny-map-storage genny-map-storage-namespace-metadata genny-map-storage-repair + +# Tests that all currently generated maps match their contents if they were regenerated +.PHONY: test-genny-map-all +test-genny-map-all: genny-map-all + @test "$(shell git diff --shortstat 2>/dev/null)" = "" || (git diff --no-color && echo "Check git status, there are dirty files" && exit 1) + @test "$(shell git status --porcelain 2>/dev/null | grep "^??")" = "" || (git status --porcelain && echo "Check git status, there are untracked files" && exit 1) + +# Map generation rule for client/receivedBlocksMap +.PHONY: genny-map-client-received-blocks +genny-map-client-received-blocks: install-m3x-repo + cd $(m3x_package_path) && make hashmap-gen \ + pkg=client \ + key_type=idAndBlockStart \ + value_type=receivedBlocks \ + out_dir=$(m3db_package_path)/client \ + rename_type_prefix=receivedBlocks + # Rename generated map f ile + mv -f $(m3db_package_path)/client/map_gen.go $(m3db_package_path)/client/received_blocks_map_gen.go + +# Map generation rule for storage/block/retrieverMap +.PHONY: genny-map-storage-block-retriever +genny-map-storage-block-retriever: install-m3x-repo + cd $(m3x_package_path) && make idhashmap-gen \ + pkg=block \ + value_type=DatabaseBlockRetriever \ + out_dir=$(m3db_package_path)/storage/block \ + rename_type_prefix=retriever \ + rename_constructor=newRetrieverMap \ + rename_constructor_options=retrieverMapOptions + # Rename both generated map and constructor files + mv -f $(m3db_package_path)/storage/block/map_gen.go $(m3db_package_path)/storage/block/retriever_map_gen.go + mv -f $(m3db_package_path)/storage/block/new_map_gen.go $(m3db_package_path)/storage/block/retriever_new_map_gen.go + +# Map generation rule for storage/bootstrap/result/Map +.PHONY: genny-map-storage-bootstrap-result +genny-map-storage-bootstrap-result: install-m3x-repo + cd $(m3x_package_path) && make idhashmap-gen \ + pkg=result \ + value_type=DatabaseSeriesBlocks \ + out_dir=$(m3db_package_path)/storage/bootstrap/result + +# Map generation rule for storage package maps (to avoid double build over each other +# when generating map source files in parallel, run these sequentially) +.PHONY: genny-map-storage +genny-map-storage: + make genny-map-storage-database-namespaces + make genny-map-storage-shard + +# Map generation rule for storage/databaseNamespacesMap +.PHONY: genny-map-storage-database-namespaces +genny-map-storage-database-namespaces: install-m3x-repo + cd $(m3x_package_path) && make idhashmap-gen \ + pkg=storage \ + value_type=databaseNamespace \ + out_dir=$(m3db_package_path)/storage \ + rename_type_prefix=databaseNamespaces \ + rename_constructor=newDatabaseNamespacesMap \ + rename_constructor_options=databaseNamespacesMapOptions + # Rename both generated map and constructor files + mv -f $(m3db_package_path)/storage/map_gen.go $(m3db_package_path)/storage/namespace_map_gen.go + mv -f $(m3db_package_path)/storage/new_map_gen.go $(m3db_package_path)/storage/namespace_new_map_gen.go + +# Map generation rule for storage/shardMap +.PHONY: genny-map-storage-shard +genny-map-storage-shard: install-m3x-repo + cd $(m3x_package_path) && make idhashmap-gen \ + pkg=storage \ + value_type=shardListElement \ + out_dir=$(m3db_package_path)/storage \ + rename_type_prefix=shard \ + rename_constructor=newShardMap \ + rename_constructor_options=shardMapOptions + # Rename both generated map and constructor files + mv -f $(m3db_package_path)/storage/map_gen.go $(m3db_package_path)/storage/shard_map_gen.go + mv -f $(m3db_package_path)/storage/new_map_gen.go $(m3db_package_path)/storage/shard_new_map_gen.go + +# Map generation rule for storage/namespace/metadataMap +.PHONY: genny-map-storage-namespace-metadata +genny-map-storage-namespace-metadata: install-m3x-repo + cd $(m3x_package_path) && make idhashmap-gen \ + pkg=namespace \ + value_type=Metadata \ + out_dir=$(m3db_package_path)/storage/namespace \ + rename_type_prefix=metadata \ + rename_constructor=newMetadataMap \ + rename_constructor_options=metadataMapOptions + # Rename both generated map and constructor files + mv -f $(m3db_package_path)/storage/namespace/map_gen.go $(m3db_package_path)/storage/namespace/metadata_map_gen.go + mv -f $(m3db_package_path)/storage/namespace/new_map_gen.go $(m3db_package_path)/storage/namespace/metadata_new_map_gen.go + +# Map generation rule for storage/repair/Map +.PHONY: genny-map-storage-repair +genny-map-storage-repair: install-m3x-repo + cd $(m3x_package_path) && make idhashmap-gen \ + pkg=repair \ + value_type=ReplicaSeriesBlocksMetadata \ + out_dir=$(m3db_package_path)/storage/repair diff --git a/glide.lock b/glide.lock index 2fba673bdc..73998ee371 100644 --- a/glide.lock +++ b/glide.lock @@ -1,5 +1,5 @@ -hash: 819ca4474f6e7d12dc3e5d093a8100f6e0f07792f16c20541b1da86bf4a22691 -updated: 2018-04-16T15:40:38.436333596-04:00 +hash: b02669f674361cbfe8df466611da4a6567ecbf6c671269a66d1252030804bf85 +updated: 2018-04-17T16:22:25.025883-04:00 imports: - name: github.com/apache/thrift version: 53dd39833a08ce33582e5ff31fa18bb4735d6731 @@ -200,6 +200,7 @@ imports: version: df3eee08112241de1bd2cbff83e61b39977cfc72 subpackages: - doc + - idx - index - index/segment - index/segment/mem @@ -207,8 +208,12 @@ imports: - index/segment/mem/postingsgen - postings - postings/roaring + - search + - search/executor + - search/query + - search/searcher - name: github.com/m3db/m3x - version: 8bfb57c2e1a57770262378f26833110fbd029d94 + version: 29bc232d9ad2e6c4a1804ccce095bb730fb1c6bc vcs: git subpackages: - checked @@ -254,7 +259,7 @@ imports: subpackages: - difflib - name: github.com/prometheus/client_golang - version: c5b7fccd204277076155f10851dad72b76a49317 + version: 967789050ba94deca04a5e84cce8ad472ce313c1 subpackages: - prometheus - name: github.com/prometheus/client_model diff --git a/glide.yaml b/glide.yaml index b75da225c6..c0039afb4d 100644 --- a/glide.yaml +++ b/glide.yaml @@ -1,7 +1,7 @@ package: github.com/m3db/m3db import: - package: github.com/m3db/m3x - version: 8bfb57c2e1a57770262378f26833110fbd029d94 + version: 29bc232d9ad2e6c4a1804ccce095bb730fb1c6bc vcs: git subpackages: - checked @@ -40,6 +40,9 @@ import: - package: github.com/m3db/stackadler32 version: bfebcd73ef6ffe0ee30489227f0330c39064b674 +- package: github.com/cespare/xxhash + version: 48099fad606eafc26e3a569fad19ff510fff4df6 + - package: github.com/apache/thrift version: ~0.9.3 subpackages: diff --git a/integration/client.go b/integration/client.go index beb39f5963..10b8b91214 100644 --- a/integration/client.go +++ b/integration/client.go @@ -59,7 +59,7 @@ func tchannelClientWriteBatch(client rpc.TChanNode, timeout time.Duration, names for _, series := range seriesList { for _, dp := range series.Data { elem := &rpc.WriteBatchRawRequestElement{ - ID: series.ID.Data().Get(), + ID: series.ID.Data().Bytes(), Datapoint: &rpc.Datapoint{ Timestamp: xtime.ToNormalizedTime(dp.Timestamp, time.Second), Value: dp.Value, @@ -71,7 +71,10 @@ func tchannelClientWriteBatch(client rpc.TChanNode, timeout time.Duration, names } ctx, _ := thrift.NewContext(timeout) - batchReq := &rpc.WriteBatchRawRequest{NameSpace: namespace.Data().Get(), Elements: elems} + batchReq := &rpc.WriteBatchRawRequest{ + NameSpace: namespace.Data().Bytes(), + Elements: elems, + } return client.WriteBatchRaw(ctx, batchReq) } @@ -201,7 +204,7 @@ func m3dbClientFetchBlocksMetadata( metadatasByShard := make(map[uint32][]block.ReplicaMetadata, 10) // iterate over all shards - seen := make(map[ident.Hash]map[xtime.UnixNano]struct{}) + seen := make(map[string]map[xtime.UnixNano]struct{}) for _, shardID := range shards { // clear seen for key := range seen { @@ -217,11 +220,11 @@ func m3dbClientFetchBlocksMetadata( for iter.Next() { host, blockMetadata := iter.Current() - idHash := blockMetadata.ID.Hash() - seenBlocks, ok := seen[idHash] + idString := blockMetadata.ID.String() + seenBlocks, ok := seen[idString] if !ok { seenBlocks = make(map[xtime.UnixNano]struct{}) - seen[idHash] = seenBlocks + seen[idString] = seenBlocks } if _, ok := seenBlocks[xtime.ToUnixNano(blockMetadata.Start)]; ok { continue // Already seen diff --git a/integration/data.go b/integration/data.go index 7273bed07b..e88af7958f 100644 --- a/integration/data.go +++ b/integration/data.go @@ -163,7 +163,7 @@ func compareSeriesList( require.Equal(t, len(expected), len(actual)) for i := range expected { - require.Equal(t, expected[i].ID.Data().Get(), actual[i].ID.Data().Get()) + require.Equal(t, expected[i].ID.Data().Bytes(), actual[i].ID.Data().Bytes()) require.Equal(t, expected[i].Data, expected[i].Data) } } diff --git a/integration/disk_flush_helpers.go b/integration/disk_flush_helpers.go index ce86d0d04f..333cd50be3 100644 --- a/integration/disk_flush_helpers.go +++ b/integration/disk_flush_helpers.go @@ -147,7 +147,7 @@ func verifyForTime( var datapoints []ts.Datapoint it := iteratorPool.Get() - it.Reset(bytes.NewBuffer(data.Get())) + it.Reset(bytes.NewBuffer(data.Bytes())) for it.Next() { dp, _, _ := it.Current() datapoints = append(datapoints, dp) diff --git a/integration/generate/generate.go b/integration/generate/generate.go index fb75d53938..5e9c2fc265 100644 --- a/integration/generate/generate.go +++ b/integration/generate/generate.go @@ -16,7 +16,7 @@ import ( func (l SeriesBlock) Len() int { return len(l) } func (l SeriesBlock) Swap(i, j int) { l[i], l[j] = l[j], l[i] } func (l SeriesBlock) Less(i, j int) bool { - return bytes.Compare(l[i].ID.Data().Get(), l[j].ID.Data().Get()) < 0 + return bytes.Compare(l[i].ID.Data().Bytes(), l[j].ID.Data().Bytes()) < 0 } // Block generates a SeriesBlock based on provided config @@ -98,5 +98,5 @@ func (l SeriesDataPointsByTime) Less(i, j int) bool { if !l[i].Timestamp.Equal(l[j].Timestamp) { return l[i].Timestamp.Before(l[j].Timestamp) } - return bytes.Compare(l[i].ID.Data().Get(), l[j].ID.Data().Get()) < 0 + return bytes.Compare(l[i].ID.Data().Bytes(), l[j].ID.Data().Bytes()) < 0 } diff --git a/integration/mixed_mode_read_write_test.go b/integration/mixed_mode_read_write_test.go index 6ec09d5fbc..4c88d0c6c5 100644 --- a/integration/mixed_mode_read_write_test.go +++ b/integration/mixed_mode_read_write_test.go @@ -249,15 +249,16 @@ type seriesDatapoint struct { } func (d dataPointsInTimeOrder) toSeriesMap(blockSize time.Duration) generate.SeriesBlocksByStart { - blockStartToSeriesMap := make(map[xtime.UnixNano]map[ident.Hash]generate.Series) + blockStartToSeriesMap := make(map[xtime.UnixNano]map[string]generate.Series) for _, point := range d { t := point.time trunc := t.Truncate(blockSize) seriesBlock, ok := blockStartToSeriesMap[xtime.ToUnixNano(trunc)] if !ok { - seriesBlock = make(map[ident.Hash]generate.Series) + seriesBlock = make(map[string]generate.Series) } - dp, ok := seriesBlock[point.series.Hash()] + idString := point.series.String() + dp, ok := seriesBlock[idString] if !ok { dp = generate.Series{ID: point.series} } @@ -265,7 +266,7 @@ func (d dataPointsInTimeOrder) toSeriesMap(blockSize time.Duration) generate.Ser Timestamp: t, Value: point.value, }) - seriesBlock[point.series.Hash()] = dp + seriesBlock[idString] = dp blockStartToSeriesMap[xtime.ToUnixNano(trunc)] = seriesBlock } diff --git a/integration/truncate_namespace_test.go b/integration/truncate_namespace_test.go index 63da7c3337..a2bb8e1938 100644 --- a/integration/truncate_namespace_test.go +++ b/integration/truncate_namespace_test.go @@ -102,7 +102,7 @@ func TestTruncateNamespace(t *testing.T) { log.Debugf("truncate namespace %s", testNamespaces[0]) truncateReq := rpc.NewTruncateRequest() - truncateReq.NameSpace = testNamespaces[0].Data().Get() + truncateReq.NameSpace = testNamespaces[0].Data().Bytes() truncated, err := testSetup.truncate(truncateReq) require.NoError(t, err) require.Equal(t, int64(1), truncated) diff --git a/network/server/tchannelthrift/convert/convert.go b/network/server/tchannelthrift/convert/convert.go index e7d6096626..57410d276c 100644 --- a/network/server/tchannelthrift/convert/convert.go +++ b/network/server/tchannelthrift/convert/convert.go @@ -159,7 +159,7 @@ func ToSegments(readers []xio.SegmentReader) (ToSegmentsResult, error) { func bytesRef(data checked.Bytes) []byte { if data != nil { - return data.Get() + return data.Bytes() } return nil } diff --git a/network/server/tchannelthrift/node/service.go b/network/server/tchannelthrift/node/service.go index 955832054d..24fdbed841 100644 --- a/network/server/tchannelthrift/node/service.go +++ b/network/server/tchannelthrift/node/service.go @@ -454,7 +454,7 @@ func (s *service) FetchBlocksMetadataRaw(tctx thrift.Context, req *rpc.FetchBloc for _, fetchedMetadata := range fetchedResults { blocksMetadata := s.blocksMetadataPool.Get() - blocksMetadata.ID = fetchedMetadata.ID.Data().Get() + blocksMetadata.ID = fetchedMetadata.ID.Data().Bytes() fetchedMetadataBlocks := fetchedMetadata.Blocks.Results() blocksMetadata.Blocks = s.blockMetadataSlicePool.Get() @@ -575,7 +575,7 @@ func (s *service) getBlocksMetadataV2FromResult( for _, fetchedMetadata := range results.Results() { fetchedMetadataBlocks := fetchedMetadata.Blocks.Results() - id := fetchedMetadata.ID.Data().Get() + id := fetchedMetadata.ID.Data().Bytes() for _, fetchedMetadataBlock := range fetchedMetadataBlocks { blockMetadata := s.blockMetadataV2Pool.Get() diff --git a/network/server/tchannelthrift/node/service_test.go b/network/server/tchannelthrift/node/service_test.go index 04cdde8bbd..062e94abc4 100644 --- a/network/server/tchannelthrift/node/service_test.go +++ b/network/server/tchannelthrift/node/service_test.go @@ -222,10 +222,10 @@ func TestServiceFetchBatchRaw(t *testing.T) { require.NoError(t, err) if expectSegment.Head != nil { - expectHead = expectSegment.Head.Get() + expectHead = expectSegment.Head.Bytes() } if expectSegment.Tail != nil { - expectTail = expectSegment.Tail.Get() + expectTail = expectSegment.Tail.Bytes() } assert.Equal(t, expectHead, seg.Merged.Head) @@ -331,10 +331,10 @@ func TestServiceFetchBlocksRaw(t *testing.T) { require.NoError(t, err) if expectSegment.Head != nil { - expectHead = expectSegment.Head.Get() + expectHead = expectSegment.Head.Bytes() } if expectSegment.Tail != nil { - expectTail = expectSegment.Tail.Get() + expectTail = expectSegment.Tail.Bytes() } assert.Equal(t, expectHead, seg.Merged.Head) diff --git a/persist/fs/clone/cloner_test.go b/persist/fs/clone/cloner_test.go index d73b4e775e..c4621ad82f 100644 --- a/persist/fs/clone/cloner_test.go +++ b/persist/fs/clone/cloner_test.go @@ -95,7 +95,7 @@ func TestCloner(t *testing.T) { b1.IncRef() b2.IncRef() require.Equal(t, t1.String(), t2.String()) - require.Equal(t, b1.Get(), b2.Get()) + require.Equal(t, b1.Bytes(), b2.Bytes()) require.Equal(t, c1, c2) b1.DecRef() b2.DecRef() diff --git a/persist/fs/commitlog/read_write_prop_test.go b/persist/fs/commitlog/read_write_prop_test.go index 2efd99d3d8..825408ff41 100644 --- a/persist/fs/commitlog/read_write_prop_test.go +++ b/persist/fs/commitlog/read_write_prop_test.go @@ -302,7 +302,7 @@ func newInitState(dir string, t *testing.T) *clState { } func (s *clState) writesArePresent(writes ...generatedWrite) error { - writesOnDisk := make(map[ident.Hash]map[xtime.UnixNano]generatedWrite) + writesOnDisk := make(map[string]map[xtime.UnixNano]generatedWrite) iterOpts := IteratorOpts{ CommitLogOptions: s.opts, FileFilterPredicate: ReadAllPredicate(), @@ -316,11 +316,11 @@ func (s *clState) writesArePresent(writes ...generatedWrite) error { defer iter.Close() for iter.Next() { series, datapoint, unit, annotation := iter.Current() - idHash := series.ID.Hash() - seriesMap, ok := writesOnDisk[idHash] + idString := series.ID.String() + seriesMap, ok := writesOnDisk[idString] if !ok { seriesMap = make(map[xtime.UnixNano]generatedWrite) - writesOnDisk[idHash] = seriesMap + writesOnDisk[idString] = seriesMap } seriesMap[xtime.ToUnixNano(datapoint.Timestamp)] = generatedWrite{ series: series, @@ -335,8 +335,8 @@ func (s *clState) writesArePresent(writes ...generatedWrite) error { missingErr := fmt.Errorf("writesOnDisk: %+v, writes: %+v", writesOnDisk, writes) for _, w := range writes { - idHash := w.series.ID.Hash() - seriesMap, ok := writesOnDisk[idHash] + idString := w.series.ID.String() + seriesMap, ok := writesOnDisk[idString] if !ok { return missingErr } diff --git a/persist/fs/commitlog/writer.go b/persist/fs/commitlog/writer.go index 5231c8c3d5..cf471ee77e 100644 --- a/persist/fs/commitlog/writer.go +++ b/persist/fs/commitlog/writer.go @@ -171,8 +171,8 @@ func (w *writer) Write( // If "idx" likely hasn't been written to commit log // yet we need to include series metadata var metadata schema.LogMetadata - metadata.ID = series.ID.Data().Get() - metadata.Namespace = series.Namespace.Data().Get() + metadata.ID = series.ID.Data().Bytes() + metadata.Namespace = series.Namespace.Data().Bytes() metadata.Shard = series.Shard w.metadataEncoder.Reset() if err := w.metadataEncoder.EncodeLogMetadata(metadata); err != nil { diff --git a/persist/fs/index_lookup.go b/persist/fs/index_lookup.go index 0e530f862f..0681aa436d 100644 --- a/persist/fs/index_lookup.go +++ b/persist/fs/index_lookup.go @@ -26,9 +26,9 @@ import ( "fmt" "github.com/m3db/m3db/digest" + xmsgpack "github.com/m3db/m3db/persist/fs/msgpack" "github.com/m3db/m3db/x/mmap" "github.com/m3db/m3x/ident" - xmsgpack "github.com/m3db/m3db/persist/fs/msgpack" "gopkg.in/vmihailenco/msgpack.v2" ) @@ -85,7 +85,7 @@ func (il *nearestIndexOffsetLookup) concurrentClone() (*nearestIndexOffsetLookup // In other words, the returned offset can always be used as a starting point to // begin scanning the index file for the desired series. func (il *nearestIndexOffsetLookup) getNearestIndexFileOffset(id ident.ID) (int64, error) { - idBytes := id.Data().Get() + idBytes := id.Data().Bytes() min := 0 max := len(il.summaryIDsOffsets) - 1 diff --git a/persist/fs/index_lookup_prop_test.go b/persist/fs/index_lookup_prop_test.go index d28f150509..9e368d060b 100644 --- a/persist/fs/index_lookup_prop_test.go +++ b/persist/fs/index_lookup_prop_test.go @@ -49,7 +49,7 @@ func TestIndexLookupWriteRead(t *testing.T) { writes := []generatedWrite{} unique := map[string]struct{}{} for _, write := range input.realWrites { - s := string(write.id.Data().Get()) + s := string(write.id.Data().Bytes()) if _, ok := unique[s]; ok { continue } diff --git a/persist/fs/read.go b/persist/fs/read.go index ad30eb8adf..28c9427aa7 100644 --- a/persist/fs/read.go +++ b/persist/fs/read.go @@ -345,7 +345,7 @@ func (r *reader) Read() (ident.ID, checked.Bytes, uint32, error) { defer data.DecRef() } - n, err := r.dataReader.Read(data.Get()) + n, err := r.dataReader.Read(data.Bytes()) if err != nil { return none, nil, 0, err } diff --git a/persist/fs/read_write_test.go b/persist/fs/read_write_test.go index fd51f03aaf..9da07e14dd 100644 --- a/persist/fs/read_write_test.go +++ b/persist/fs/read_write_test.go @@ -127,14 +127,14 @@ func readTestData(t *testing.T, r FileSetReader, shard uint32, timestamp time.Ti data.IncRef() assert.Equal(t, entries[i].id, id.String()) - assert.True(t, bytes.Equal(entries[i].data, data.Get())) + assert.True(t, bytes.Equal(entries[i].data, data.Bytes())) assert.Equal(t, digest.Checksum(entries[i].data), checksum) assert.Equal(t, i+1, r.EntriesRead()) // Verify that the bloomFilter was bootstrapped properly by making sure it // at least contains every ID - assert.True(t, bloomFilter.Test(id.Data().Get())) + assert.True(t, bloomFilter.Test(id.Data().Bytes())) id.Finalize() data.DecRef() @@ -151,7 +151,7 @@ func readTestData(t *testing.T, r FileSetReader, shard uint32, timestamp time.Ti // Verify that the bloomFilter was bootstrapped properly by making sure it // at least contains every ID - assert.True(t, bloomFilter.Test(id.Data().Get())) + assert.True(t, bloomFilter.Test(id.Data().Bytes())) id.Finalize() } diff --git a/persist/fs/retriever.go b/persist/fs/retriever.go index a3b25d6d44..5cee19c9d4 100644 --- a/persist/fs/retriever.go +++ b/persist/fs/retriever.go @@ -314,7 +314,7 @@ func (r *blockRetriever) fetchBatch( if data != nil { dataCopy := r.bytesPool.Get(data.Len()) onRetrieveSeg = ts.NewSegment(dataCopy, nil, ts.FinalizeHead) - dataCopy.AppendAll(data.Get()) + dataCopy.AppendAll(data.Bytes()) } } @@ -380,7 +380,7 @@ func (r *blockRetriever) Stream( // If the ID is not in the seeker's bloom filter, then it's definitely not on // disk and we can return immediately - if !bloomFilter.Test(id.Data().Get()) { + if !bloomFilter.Test(id.Data().Bytes()) { // No need to call req.onRetrieve.OnRetrieveBlock if there is no data req.onRetrieved(ts.Segment{}) return req, nil diff --git a/persist/fs/retriever_concurrent_test.go b/persist/fs/retriever_concurrent_test.go index 037e7451e6..c501ad31f3 100644 --- a/persist/fs/retriever_concurrent_test.go +++ b/persist/fs/retriever_concurrent_test.go @@ -98,7 +98,7 @@ func newOpenTestWriter( type streamResult struct { ctx context.Context shard uint32 - id ident.ID + id string blockStart time.Time stream xio.SegmentReader } @@ -152,8 +152,9 @@ func testBlockRetrieverHighConcurrentSeeks(t *testing.T, shouldCacheShardIndices shards = []uint32{0, 1, 2} idsPerShard = 16 shardIDs = make(map[uint32][]ident.ID) + shardIDStrings = make(map[uint32][]string) dataBytesPerID = 32 - shardData = make(map[uint32]map[ident.Hash]map[xtime.UnixNano]checked.Bytes) + shardData = make(map[uint32]map[string]map[xtime.UnixNano]checked.Bytes) blockStarts []time.Time ) for st := min; !st.After(max); st = st.Add(ropts.BlockSize()) { @@ -161,14 +162,17 @@ func testBlockRetrieverHighConcurrentSeeks(t *testing.T, shouldCacheShardIndices } for _, shard := range shards { shardIDs[shard] = make([]ident.ID, 0, idsPerShard) - shardData[shard] = make(map[ident.Hash]map[xtime.UnixNano]checked.Bytes, idsPerShard) + shardData[shard] = make(map[string]map[xtime.UnixNano]checked.Bytes, idsPerShard) for _, blockStart := range blockStarts { w, closer := newOpenTestWriter(t, fsOpts, shard, blockStart) for i := 0; i < idsPerShard; i++ { - id := ident.StringID(fmt.Sprintf("foo.%d", i)) + idString := fmt.Sprintf("foo.%d", i) + shardIDStrings[shard] = append(shardIDStrings[shard], idString) + + id := ident.StringID(idString) shardIDs[shard] = append(shardIDs[shard], id) - if _, ok := shardData[shard][id.Hash()]; !ok { - shardData[shard][id.Hash()] = make(map[xtime.UnixNano]checked.Bytes, len(blockStarts)) + if _, ok := shardData[shard][idString]; !ok { + shardData[shard][idString] = make(map[xtime.UnixNano]checked.Bytes, len(blockStarts)) } data := checked.NewBytes(nil, nil) @@ -176,9 +180,9 @@ func testBlockRetrieverHighConcurrentSeeks(t *testing.T, shouldCacheShardIndices for j := 0; j < dataBytesPerID; j++ { data.Append(byte(rand.Int63n(256))) } - shardData[shard][id.Hash()][xtime.ToUnixNano(blockStart)] = data + shardData[shard][idString][xtime.ToUnixNano(blockStart)] = data - err := w.Write(id, data, digest.Checksum(data.Get())) + err := w.Write(id, data, digest.Checksum(data.Bytes())) require.NoError(t, err) } closer() @@ -214,6 +218,7 @@ func testBlockRetrieverHighConcurrentSeeks(t *testing.T, shouldCacheShardIndices shard := uint32((j + shardOffset) % len(shards)) idIdx := uint32((j + idOffset) % len(shardIDs[shard])) id := shardIDs[shard][idIdx] + idString := shardIDStrings[shard][idIdx] for k := 0; k < len(blockStarts); k++ { ctx := context.NewContext() @@ -222,22 +227,23 @@ func testBlockRetrieverHighConcurrentSeeks(t *testing.T, shouldCacheShardIndices results = append(results, streamResult{ ctx: ctx, shard: shard, - id: id, + id: idString, blockStart: blockStarts[k], stream: stream, }) } + for _, r := range results { seg, err := r.stream.Segment() if err != nil { fmt.Printf("\nstream seg err: %v\n", err) - fmt.Printf("id: %s\n", r.id.String()) + fmt.Printf("id: %s\n", r.id) fmt.Printf("shard: %d\n", r.shard) fmt.Printf("start: %v\n", r.blockStart.String()) } require.NoError(t, err) - compare.Head = shardData[r.shard][r.id.Hash()][xtime.ToUnixNano(r.blockStart)] + compare.Head = shardData[r.shard][r.id][xtime.ToUnixNano(r.blockStart)] assert.True(t, seg.Equal(&compare)) r.ctx.Close() @@ -284,7 +290,7 @@ func TestBlockRetrieverIDDoesNotExist(t *testing.T) { data := checked.NewBytes([]byte("Hello world!"), nil) data.IncRef() defer data.DecRef() - err = w.Write(ident.StringID("exists"), data, digest.Checksum(data.Get())) + err = w.Write(ident.StringID("exists"), data, digest.Checksum(data.Bytes())) assert.NoError(t, err) closer() diff --git a/persist/fs/seek.go b/persist/fs/seek.go index 54e0a53e7f..15b033167e 100644 --- a/persist/fs/seek.go +++ b/persist/fs/seek.go @@ -390,7 +390,7 @@ func (s *seeker) SeekByIndexEntry(entry IndexEntry) (checked.Bytes, error) { } // Copy the actual data into the underlying buffer - underlyingBuf := buffer.Get() + underlyingBuf := buffer.Bytes() copy(underlyingBuf, data[:entry.Size]) // NB(r): _must_ check the checksum against known checksum as the data @@ -413,7 +413,7 @@ func (s *seeker) SeekIndexEntry(id ident.ID) (IndexEntry, error) { stream := msgpack.NewDecoderStream(s.indexMmap[offset:]) s.decoder.Reset(stream) - idBytes := id.Data().Get() + idBytes := id.Data().Bytes() // Prevent panic's when we're scanning to the end of the buffer for stream.Remaining() != 0 { entry, err := s.decoder.DecodeIndexEntry() diff --git a/persist/fs/seek_test.go b/persist/fs/seek_test.go index b02aae098b..950a82b83c 100644 --- a/persist/fs/seek_test.go +++ b/persist/fs/seek_test.go @@ -201,14 +201,14 @@ func TestSeek(t *testing.T) { data.IncRef() defer data.DecRef() - assert.Equal(t, []byte{1, 2, 3}, data.Get()) + assert.Equal(t, []byte{1, 2, 3}, data.Bytes()) data, err = s.SeekByID(ident.StringID("foo1")) require.NoError(t, err) data.IncRef() defer data.DecRef() - assert.Equal(t, []byte{1, 2, 1}, data.Get()) + assert.Equal(t, []byte{1, 2, 1}, data.Bytes()) _, err = s.SeekByID(ident.StringID("foo")) assert.Error(t, err) @@ -219,7 +219,7 @@ func TestSeek(t *testing.T) { data.IncRef() defer data.DecRef() - assert.Equal(t, []byte{1, 2, 2}, data.Get()) + assert.Equal(t, []byte{1, 2, 2}, data.Bytes()) assert.NoError(t, s.Close()) } @@ -329,7 +329,7 @@ func TestReuseSeeker(t *testing.T) { data.IncRef() defer data.DecRef() - assert.Equal(t, []byte{1, 2, 1}, data.Get()) + assert.Equal(t, []byte{1, 2, 1}, data.Bytes()) err = s.Open(testNs1ID, 0, testWriterStart) assert.NoError(t, err) @@ -339,7 +339,7 @@ func TestReuseSeeker(t *testing.T) { data.IncRef() defer data.DecRef() - assert.Equal(t, []byte{1, 2, 3}, data.Get()) + assert.Equal(t, []byte{1, 2, 3}, data.Bytes()) } func TestCloneSeeker(t *testing.T) { @@ -396,5 +396,5 @@ func TestCloneSeeker(t *testing.T) { data.IncRef() defer data.DecRef() - assert.Equal(t, []byte{1, 2, 1}, data.Get()) + assert.Equal(t, []byte{1, 2, 1}, data.Bytes()) } diff --git a/persist/fs/write.go b/persist/fs/write.go index fa4be491f3..75c5e07c03 100644 --- a/persist/fs/write.go +++ b/persist/fs/write.go @@ -87,7 +87,7 @@ func (e indexEntries) Len() int { } func (e indexEntries) Less(i, j int) bool { - return bytes.Compare(e[i].id.Data().Get(), e[j].id.Data().Get()) < 0 + return bytes.Compare(e[i].id.Data().Bytes(), e[j].id.Data().Bytes()) < 0 } func (e indexEntries) Swap(i, j int) { @@ -273,7 +273,7 @@ func (w *writer) writeAll( if d == nil { continue } - if err := w.writeData(d.Get()); err != nil { + if err := w.writeData(d.Bytes()); err != nil { return err } } @@ -401,7 +401,7 @@ func (w *writer) writeIndexFileContents( prevID []byte ) for i := range w.indexEntries { - id := w.indexEntries[i].id.Data().Get() + id := w.indexEntries[i].id.Data().Bytes() // Need to check if i > 0 or we can never write an empty string ID if i > 0 && bytes.Equal(id, prevID) { // Should never happen, Write() should only be called once per ID @@ -456,7 +456,7 @@ func (w *writer) writeSummariesFileContents( summary := schema.IndexSummary{ Index: w.indexEntries[i].index, - ID: w.indexEntries[i].id.Data().Get(), + ID: w.indexEntries[i].id.Data().Bytes(), IndexEntryOffset: w.indexEntries[i].indexFileOffset, } diff --git a/serialize/decoder.go b/serialize/decoder.go index 3eb7f94432..8ac0107f6a 100644 --- a/serialize/decoder.go +++ b/serialize/decoder.go @@ -56,7 +56,7 @@ func newTagDecoder(opts TagDecoderOptions, pool TagDecoderPool) TagDecoder { func (d *decoder) Reset(b checked.Bytes) { d.checkedData = b d.checkedData.IncRef() - d.data = d.checkedData.Get() + d.data = d.checkedData.Bytes() header, err := d.decodeUInt16() if err != nil { @@ -199,9 +199,9 @@ func (d *decoder) Finalize() { } func (d *decoder) cloneCurrent() ident.Tag { - name := d.opts.CheckedBytesWrapperPool().Get(d.current.Name.Data().Get()) + name := d.opts.CheckedBytesWrapperPool().Get(d.current.Name.Data().Bytes()) d.checkedData.IncRef() - value := d.opts.CheckedBytesWrapperPool().Get(d.current.Value.Data().Get()) + value := d.opts.CheckedBytesWrapperPool().Get(d.current.Value.Data().Bytes()) d.checkedData.IncRef() return ident.BinaryTag(name, value) } diff --git a/serialize/decoder_lifecycle_prop_test.go b/serialize/decoder_lifecycle_prop_test.go index 496420160a..c7aaebcb87 100644 --- a/serialize/decoder_lifecycle_prop_test.go +++ b/serialize/decoder_lifecycle_prop_test.go @@ -431,7 +431,7 @@ func newDecoderState() gopter.Gen { if !ok { return nil } - data := checked.NewBytes(b.Get(), nil) + data := checked.NewBytes(b.Bytes(), nil) return &multiDecoderState{ tags: tags, initBytes: data, diff --git a/serialize/decoder_test.go b/serialize/decoder_test.go index 5edaff0063..d5a08e90cd 100644 --- a/serialize/decoder_test.go +++ b/serialize/decoder_test.go @@ -137,7 +137,7 @@ func TestDecodeOwnershipFinalize(t *testing.T) { d.Finalize() require.Equal(t, 0, wrappedBytes.NumRef()) wrappedBytes.IncRef() - require.Nil(t, wrappedBytes.Get()) + require.Nil(t, wrappedBytes.Bytes()) } func TestDecodeMissingValue(t *testing.T) { @@ -173,9 +173,9 @@ func TestDecodeDuplicateLifecycle(t *testing.T) { require.Equal(t, oldLen, copy.Remaining()) for copy.Next() { - tag := copy.Current() // keep looping - tag.Name.Data().Get() // ensure we can get values too - tag.Value.Data().Get() // and don't panic + tag := copy.Current() // keep looping + tag.Name.Data().Bytes() // ensure we can get values too + tag.Value.Data().Bytes() // and don't panic } require.NoError(t, copy.Err()) copy.Close() @@ -194,9 +194,9 @@ func TestDecodeDuplicateIteration(t *testing.T) { require.Equal(t, oldLen, copy.Remaining()) for copy.Next() { - tag := copy.Current() // keep looping - tag.Name.Data().Get() // ensure we can get values too - tag.Value.Data().Get() // and don't panic + tag := copy.Current() // keep looping + tag.Name.Data().Bytes() // ensure we can get values too + tag.Value.Data().Bytes() // and don't panic } require.NoError(t, copy.Err()) copy.Close() @@ -214,7 +214,7 @@ func TestDecodeDuplicateLifecycleMocks(t *testing.T) { rawData := testTagDecoderBytesRaw() mockBytes := checked.NewMockBytes(ctrl) - mockBytes.EXPECT().Get().Return(rawData).AnyTimes() + mockBytes.EXPECT().Bytes().Return(rawData).AnyTimes() mockBytes.EXPECT().IncRef() d := newTestTagDecoder() diff --git a/serialize/encode_decode_prop_test.go b/serialize/encode_decode_prop_test.go index 1db72f4a78..73886d6ed6 100644 --- a/serialize/encode_decode_prop_test.go +++ b/serialize/encode_decode_prop_test.go @@ -121,11 +121,11 @@ func tagItersAreEqual(ti1, ti2 ident.TagIterator) (bool, error) { t1, t2 := ti1.Current(), ti2.Current() if !t1.Name.Equal(t2.Name) { return false, fmt.Errorf("tag names are un-equal: %v %v", - t1.Name.Data().Get(), t2.Name.Data().Get()) + t1.Name.Data().Bytes(), t2.Name.Data().Bytes()) } if !t2.Value.Equal(t2.Value) { return false, fmt.Errorf("tag values are un-equal: %v %v", - t1.Value.Data().Get(), t2.Value.Data().Get()) + t1.Value.Data().Bytes(), t2.Value.Data().Bytes()) } return tagItersAreEqual(ti1, ti2) diff --git a/serialize/encoder.go b/serialize/encoder.go index 8366df8ed1..b39a1547bf 100644 --- a/serialize/encoder.go +++ b/serialize/encoder.go @@ -163,7 +163,7 @@ func (e *encoder) encodeTag(t ident.Tag) error { } func (e *encoder) encodeID(i ident.ID) error { - d := i.Data().Get() + d := i.Data().Bytes() max := int(e.opts.TagSerializationLimits().MaxTagLiteralLength()) if len(d) >= max { return errTagLiteralTooLong diff --git a/serialize/encoder_test.go b/serialize/encoder_test.go index 3f8e4907f9..d4dfe418d1 100644 --- a/serialize/encoder_test.go +++ b/serialize/encoder_test.go @@ -46,7 +46,7 @@ func TestEmptyEncode(t *testing.T) { bc, ok := e.Data() require.True(t, ok) require.NotNil(t, bc) - b := bc.Get() + b := bc.Bytes() require.Len(t, b, 4) require.Equal(t, headerMagicBytes, b[:2]) require.Equal(t, []byte{0x0, 0x0}, b[2:4]) @@ -90,7 +90,7 @@ func TestSimpleEncode(t *testing.T) { bc, ok := e.Data() require.True(t, ok) require.NotNil(t, bc) - b := bc.Get() + b := bc.Bytes() numExpectedBytes := 2 /* header */ + 2 /* num tags */ + 2 /* abc length */ + len("abc") + 2 /* defg length */ + len("defg") + diff --git a/sharding/shardset.go b/sharding/shardset.go index 7c8edbc178..8bb044c4ca 100644 --- a/sharding/shardset.go +++ b/sharding/shardset.go @@ -161,6 +161,6 @@ func NewHashGenWithSeed(seed uint32) HashGen { // NewHashFn generates a HashFN based on murmur32 with a given seed func NewHashFn(length int, seed uint32) HashFn { return func(id ident.ID) uint32 { - return murmur3.Sum32WithSeed(id.Data().Get(), seed) % uint32(length) + return murmur3.Sum32WithSeed(id.Data().Bytes(), seed) % uint32(length) } } diff --git a/storage/block/retriever_manager.go b/storage/block/retriever_manager.go index d6758b70d0..146494debe 100644 --- a/storage/block/retriever_manager.go +++ b/storage/block/retriever_manager.go @@ -43,24 +43,22 @@ func NewDatabaseBlockRetrieverManager( ) DatabaseBlockRetrieverManager { return &blockRetrieverManager{ newRetrieverFn: newDatabaseBlockRetrieverFn, - retrievers: make(map[ident.Hash]DatabaseBlockRetriever), + retrievers: newRetrieverMap(retrieverMapOptions{}), } } type blockRetrieverManager struct { sync.RWMutex newRetrieverFn NewDatabaseBlockRetrieverFn - retrievers map[ident.Hash]DatabaseBlockRetriever + retrievers *retrieverMap } func (m *blockRetrieverManager) Retriever( md namespace.Metadata, ) (DatabaseBlockRetriever, error) { - idHash := md.ID().Hash() m.RLock() - retriever, ok := m.retrievers[idHash] + retriever, ok := m.retrievers.Get(md.ID()) m.RUnlock() - if ok { return retriever, nil } @@ -68,7 +66,7 @@ func (m *blockRetrieverManager) Retriever( m.Lock() defer m.Unlock() - retriever, ok = m.retrievers[idHash] + retriever, ok = m.retrievers.Get(md.ID()) if ok { return retriever, nil } @@ -79,7 +77,7 @@ func (m *blockRetrieverManager) Retriever( return nil, err } - m.retrievers[idHash] = retriever + m.retrievers.Set(md.ID(), retriever) return retriever, nil } diff --git a/storage/block/retriever_map_gen.go b/storage/block/retriever_map_gen.go new file mode 100644 index 0000000000..83fe4b55d7 --- /dev/null +++ b/storage/block/retriever_map_gen.go @@ -0,0 +1,259 @@ +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +package block + +import ( + "github.com/m3db/m3x/ident" +) + +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// retrieverMapHash is the hash for a given map entry, this is public to support +// iterating over the map using a native Go for loop. +type retrieverMapHash uint64 + +// retrieverMapHashFn is the hash function to execute when hashing a key. +type retrieverMapHashFn func(ident.ID) retrieverMapHash + +// retrieverMapEqualsFn is the equals key function to execute when detecting equality of a key. +type retrieverMapEqualsFn func(ident.ID, ident.ID) bool + +// retrieverMapCopyFn is the copy key function to execute when copying the key. +type retrieverMapCopyFn func(ident.ID) ident.ID + +// retrieverMapFinalizeFn is the finalize key function to execute when finished with a key. +type retrieverMapFinalizeFn func(ident.ID) + +// retrieverMap uses the genny package to provide a generic hash map that can be specialized +// by running the following command from this root of the repository: +// ``` +// make hashmap-gen pkg=outpkg key_type=Type value_type=Type out_dir=/tmp +// ``` +// Or if you would like to use bytes or ident.ID as keys you can use the +// partially specialized maps to generate your own maps as well: +// ``` +// make byteshashmap-gen pkg=outpkg value_type=Type out_dir=/tmp +// make idhashmap-gen pkg=outpkg value_type=Type out_dir=/tmp +// ``` +// This will output to stdout the generated source file to use for your map. +// It uses linear probing by incrementing the number of the hash created when +// hashing the identifier if there is a collision. +// retrieverMap is a value type and not an interface to allow for less painful +// upgrades when adding/removing methods, it is not likely to need mocking so +// an interface would not be super useful either. +type retrieverMap struct { + _retrieverMapOptions + + // lookup uses hash of the identifier for the key and the MapEntry value + // wraps the value type and the key (used to ensure lookup is correct + // when dealing with collisions), we use uint64 for the hash partially + // because lookups of maps with uint64 keys has a fast path for Go. + lookup map[retrieverMapHash]retrieverMapEntry +} + +// _retrieverMapOptions is a set of options used when creating an identifier map, it is kept +// private so that implementers of the generated map can specify their own options +// that partially fulfill these options. +type _retrieverMapOptions struct { + // hash is the hash function to execute when hashing a key. + hash retrieverMapHashFn + // equals is the equals key function to execute when detecting equality. + equals retrieverMapEqualsFn + // copy is the copy key function to execute when copying the key. + copy retrieverMapCopyFn + // finalize is the finalize key function to execute when finished with a + // key, this is optional to specify. + finalize retrieverMapFinalizeFn + // initialSize is the initial size for the map, use zero to use Go's std map + // initial size and consequently is optional to specify. + initialSize int +} + +// retrieverMapEntry is an entry in the map, this is public to support iterating +// over the map using a native Go for loop. +type retrieverMapEntry struct { + // key is used to check equality on lookups to resolve collisions + key _retrieverMapKey + // value type stored + value DatabaseBlockRetriever +} + +type _retrieverMapKey struct { + key ident.ID + finalize bool +} + +// Key returns the map entry key. +func (e retrieverMapEntry) Key() ident.ID { + return e.key.key +} + +// Value returns the map entry value. +func (e retrieverMapEntry) Value() DatabaseBlockRetriever { + return e.value +} + +// _retrieverMapAlloc is a non-exported function so that when generating the source code +// for the map you can supply a public constructor that sets the correct +// hash, equals, copy, finalize options without users of the map needing to +// implement them themselves. +func _retrieverMapAlloc(opts _retrieverMapOptions) *retrieverMap { + m := &retrieverMap{_retrieverMapOptions: opts} + m.Reallocate() + return m +} + +func (m *retrieverMap) newMapKey(k ident.ID, opts _retrieverMapKeyOptions) _retrieverMapKey { + key := _retrieverMapKey{key: k, finalize: opts.finalizeKey} + if !opts.copyKey { + return key + } + + key.key = m.copy(k) + return key +} + +func (m *retrieverMap) removeMapKey(hash retrieverMapHash, key _retrieverMapKey) { + delete(m.lookup, hash) + if key.finalize { + m.finalize(key.key) + } +} + +// Get returns a value in the map for an identifier if found. +func (m *retrieverMap) Get(k ident.ID) (DatabaseBlockRetriever, bool) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + return entry.value, true + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } + var empty DatabaseBlockRetriever + return empty, false +} + +// Set will set the value for an identifier. +func (m *retrieverMap) Set(k ident.ID, v DatabaseBlockRetriever) { + m.set(k, v, _retrieverMapKeyOptions{ + copyKey: true, + finalizeKey: m.finalize != nil, + }) +} + +// retrieverMapSetUnsafeOptions is a set of options to use when setting a value with +// the SetUnsafe method. +type retrieverMapSetUnsafeOptions struct { + NoCopyKey bool + NoFinalizeKey bool +} + +// SetUnsafe will set the value for an identifier with unsafe options for how +// the map treats the key. +func (m *retrieverMap) SetUnsafe(k ident.ID, v DatabaseBlockRetriever, opts retrieverMapSetUnsafeOptions) { + m.set(k, v, _retrieverMapKeyOptions{ + copyKey: !opts.NoCopyKey, + finalizeKey: !opts.NoFinalizeKey, + }) +} + +type _retrieverMapKeyOptions struct { + copyKey bool + finalizeKey bool +} + +func (m *retrieverMap) set(k ident.ID, v DatabaseBlockRetriever, opts _retrieverMapKeyOptions) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + m.lookup[hash] = retrieverMapEntry{ + key: entry.key, + value: v, + } + return + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } + + m.lookup[hash] = retrieverMapEntry{ + key: m.newMapKey(k, opts), + value: v, + } +} + +// Iter provides the underlying map to allow for using a native Go for loop +// to iterate the map, however callers should only ever read and not write +// the map. +func (m *retrieverMap) Iter() map[retrieverMapHash]retrieverMapEntry { + return m.lookup +} + +// Len returns the number of map entries in the map. +func (m *retrieverMap) Len() int { + return len(m.lookup) +} + +// Contains returns true if value exists for key, false otherwise, it is +// shorthand for a call to Get that doesn't return the value. +func (m *retrieverMap) Contains(k ident.ID) bool { + _, ok := m.Get(k) + return ok +} + +// Delete will remove a value set in the map for the specified key. +func (m *retrieverMap) Delete(k ident.ID) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + m.removeMapKey(hash, entry.key) + return + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } +} + +// Reset will reset the map by simply deleting all keys to avoid +// allocating a new map. +func (m *retrieverMap) Reset() { + for hash, entry := range m.lookup { + m.removeMapKey(hash, entry.key) + } +} + +// Reallocate will avoid deleting all keys and reallocate a new +// map, this is useful if you believe you have a large map and +// will not need to grow back to a similar size. +func (m *retrieverMap) Reallocate() { + if m.initialSize > 0 { + m.lookup = make(map[retrieverMapHash]retrieverMapEntry, m.initialSize) + } else { + m.lookup = make(map[retrieverMapHash]retrieverMapEntry) + } +} diff --git a/storage/block/retriever_new_map_gen.go b/storage/block/retriever_new_map_gen.go new file mode 100644 index 0000000000..522191eb5d --- /dev/null +++ b/storage/block/retriever_new_map_gen.go @@ -0,0 +1,76 @@ +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +package block + +import ( + "github.com/m3db/m3x/ident" + + "github.com/m3db/m3x/pool" + + "github.com/cespare/xxhash" +) + +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// retrieverMapOptions provides options used when created the map. +type retrieverMapOptions struct { + InitialSize int + KeyCopyPool pool.BytesPool +} + +// newRetrieverMap returns a new byte keyed map. +func newRetrieverMap(opts retrieverMapOptions) *retrieverMap { + var ( + copyFn retrieverMapCopyFn + finalizeFn retrieverMapFinalizeFn + ) + if pool := opts.KeyCopyPool; pool == nil { + copyFn = func(k ident.ID) ident.ID { + return ident.BytesID(append([]byte(nil), k.Bytes()...)) + } + } else { + copyFn = func(k ident.ID) ident.ID { + bytes := k.Bytes() + keyLen := len(bytes) + pooled := pool.Get(keyLen)[:keyLen] + copy(pooled, bytes) + return ident.BytesID(pooled) + } + finalizeFn = func(k ident.ID) { + if slice, ok := k.(ident.BytesID); ok { + pool.Put(slice) + } + } + } + return _retrieverMapAlloc(_retrieverMapOptions{ + hash: func(id ident.ID) retrieverMapHash { + return retrieverMapHash(xxhash.Sum64(id.Bytes())) + }, + equals: func(x, y ident.ID) bool { + return x.Equal(y) + }, + copy: copyFn, + finalize: finalizeFn, + initialSize: opts.InitialSize, + }) +} diff --git a/storage/block/wired_list_test.go b/storage/block/wired_list_test.go index 8d5fc1a9e4..38dfbef58f 100644 --- a/storage/block/wired_list_test.go +++ b/storage/block/wired_list_test.go @@ -165,7 +165,7 @@ func wiredListTestWiredBlocksString(l *WiredList) string { // nolint: unused b := bytes.NewBuffer(nil) for bl := l.root.next(); bl != &l.root; bl = bl.next() { dbBlock := bl.(*dbBlock) - b.WriteString(fmt.Sprintf("%s\n", string(dbBlock.segment.Head.Get()))) + b.WriteString(fmt.Sprintf("%s\n", string(dbBlock.segment.Head.Bytes()))) } return b.String() } diff --git a/storage/bootstrap/bootstrapper/base_test.go b/storage/bootstrap/bootstrapper/base_test.go index c99344de6b..676037a5fb 100644 --- a/storage/bootstrap/bootstrapper/base_test.go +++ b/storage/bootstrap/bootstrapper/base_test.go @@ -132,9 +132,10 @@ func validateResult(t *testing.T, expected, actual result.BootstrapResult) { require.True(t, ok) es := result.AllSeries() as := actualShardResults[shard].AllSeries() - require.Equal(t, len(es), len(as)) - for id, expectedSeries := range es { - actualSeries, exists := as[id] + require.Equal(t, es.Len(), as.Len()) + for _, entry := range es.Iter() { + id, expectedSeries := entry.Key(), entry.Value() + actualSeries, exists := as.Get(id) require.True(t, exists) validateSeries(t, expectedSeries.Blocks, actualSeries.Blocks) } diff --git a/storage/bootstrap/bootstrapper/commitlog/source.go b/storage/bootstrap/bootstrapper/commitlog/source.go index 3fe338336a..0261ee4095 100644 --- a/storage/bootstrap/bootstrapper/commitlog/source.go +++ b/storage/bootstrap/bootstrapper/commitlog/source.go @@ -301,7 +301,7 @@ func (s *commitLogSource) mergeShards( var shardResult result.ShardResult shardResult, shardEmptyErrs[shard], shardErrs[shard] = s.mergeShard( unmergedShard, blocksPool, multiReaderIteratorPool, encoderPool, blopts) - if shardResult != nil && len(shardResult.AllSeries()) > 0 { + if shardResult != nil && shardResult.NumSeries() > 0 { // Prevent race conditions while updating bootstrapResult from multiple go-routines bootstrapResultLock.Lock() // Shard is a slice index so conversion to uint32 is safe diff --git a/storage/bootstrap/bootstrapper/commitlog/source_prop_test.go b/storage/bootstrap/bootstrapper/commitlog/source_prop_test.go index b1d1021d63..a3f9eb1f0f 100644 --- a/storage/bootstrap/bootstrapper/commitlog/source_prop_test.go +++ b/storage/bootstrap/bootstrapper/commitlog/source_prop_test.go @@ -247,5 +247,5 @@ func seriesUniqueIndex(series string) uint64 { // hashIDToShard generates a HashFn based on murmur32 func hashIDToShard(id ident.ID) uint32 { - return murmur3.Sum32(id.Data().Get()) % uint32(maxShards) + return murmur3.Sum32(id.Data().Bytes()) % uint32(maxShards) } diff --git a/storage/bootstrap/bootstrapper/commitlog/source_test.go b/storage/bootstrap/bootstrapper/commitlog/source_test.go index 09a56dc597..b5e9d9897a 100644 --- a/storage/bootstrap/bootstrapper/commitlog/source_test.go +++ b/storage/bootstrap/bootstrapper/commitlog/source_test.go @@ -462,7 +462,8 @@ func createExpectedShardResult( expected[v.s.Shard] = shardResult } - blocks := shardResult.AllSeries()[v.s.ID.Hash()].Blocks + series, _ := shardResult.AllSeries().Get(v.s.ID) + blocks := series.Blocks blockStart := v.t.Truncate(blockSize) r, ok := allResults[v.s.ID.String()] @@ -510,17 +511,18 @@ func createExpectedShardResult( func verifyShardResultsAreEqual(opts Options, shard uint32, actualResult, expectedResult result.ShardResult) error { expectedSeries := expectedResult.AllSeries() actualSeries := actualResult.AllSeries() - if len(expectedSeries) != len(actualSeries) { + if expectedSeries.Len() != actualSeries.Len() { return fmt.Errorf( "different number of series for shard: %v . expected: %d , actual: %d", shard, - len(expectedSeries), - len(actualSeries), + expectedSeries.Len(), + actualSeries.Len(), ) } - for expectedID, expectedBlocks := range expectedSeries { - actualBlocks, ok := actualSeries[expectedID] + for _, entry := range expectedSeries.Iter() { + expectedID, expectedBlocks := entry.Key(), entry.Value() + actualBlocks, ok := actualSeries.Get(expectedID) if !ok { return fmt.Errorf("series: %v present in expected but not actual", expectedID) } diff --git a/storage/bootstrap/bootstrapper/fs/source.go b/storage/bootstrap/bootstrapper/fs/source.go index c085247550..b858af0514 100644 --- a/storage/bootstrap/bootstrapper/fs/source.go +++ b/storage/bootstrap/bootstrapper/fs/source.go @@ -223,7 +223,7 @@ func (s *fileSystemSource) bootstrapFromReaders( shardResults := bootstrapResult.ShardResults() for shard, results := range shardResults { - if len(results.AllSeries()) == 0 { + if results.NumSeries() == 0 { delete(shardResults, shard) } } @@ -258,7 +258,8 @@ func (s *fileSystemSource) handleErrorsAndUnfulfilled( resultLock.Lock() shardResult, ok := bootstrapResult.ShardResults()[shard] if ok { - for _, series := range shardResult.AllSeries() { + for _, entry := range shardResult.AllSeries().Iter() { + series := entry.Value() for _, t := range timesWithErrors { shardResult.RemoveBlockAt(series.ID, t) } @@ -374,10 +375,8 @@ func (s *fileSystemSource) loadShardReadersDataIntoShardResult( break } - idHash := id.Hash() - resultLock.RLock() - entry, exists := shardResult.AllSeries()[idHash] + entry, exists := shardResult.AllSeries().Get(id) resultLock.RUnlock() if exists { diff --git a/storage/bootstrap/bootstrapper/fs/source_test.go b/storage/bootstrap/bootstrapper/fs/source_test.go index 8e13355294..f3af7e5ada 100644 --- a/storage/bootstrap/bootstrapper/fs/source_test.go +++ b/storage/bootstrap/bootstrapper/fs/source_test.go @@ -147,7 +147,7 @@ func writeTSDBFiles(t *testing.T, dir string, namespace ident.ID, shard uint32, bytes := checked.NewBytes(data, nil) bytes.IncRef() - require.NoError(t, w.Write(ident.StringID(id), bytes, digest.Checksum(bytes.Get()))) + require.NoError(t, w.Write(ident.StringID(id), bytes, digest.Checksum(bytes.Bytes()))) require.NoError(t, w.Close()) } @@ -348,22 +348,24 @@ func validateReadResults( require.NotNil(t, res.ShardResults()) require.NotNil(t, res.ShardResults()[testShard]) allSeries := res.ShardResults()[testShard].AllSeries() - require.Equal(t, 2, len(allSeries)) + require.Equal(t, 2, allSeries.Len()) require.NotNil(t, res.Unfulfilled()) require.NotNil(t, res.Unfulfilled()[testShard]) validateTimeRanges(t, res.Unfulfilled()[testShard], expected) - require.Equal(t, 2, len(allSeries)) + require.Equal(t, 2, allSeries.Len()) - ids := []ident.Hash{ - ident.StringID("foo").Hash(), ident.StringID("bar").Hash()} + ids := []ident.ID{ + ident.StringID("foo"), ident.StringID("bar")} data := [][]byte{ {1, 2, 3}, {4, 5, 6}, } times := []time.Time{testStart, testStart.Add(10 * time.Hour)} for i, id := range ids { - allBlocks := allSeries[id].Blocks.AllBlocks() + series, ok := allSeries.Get(id) + require.True(t, ok) + allBlocks := series.Blocks.AllBlocks() require.Equal(t, 1, len(allBlocks)) block := allBlocks[xtime.ToUnixNano(times[i])] ctx := context.NewContext() diff --git a/storage/bootstrap/bootstrapper/peers/source.go b/storage/bootstrap/bootstrapper/peers/source.go index 148d3160c3..862ebbaf48 100644 --- a/storage/bootstrap/bootstrapper/peers/source.go +++ b/storage/bootstrap/bootstrapper/peers/source.go @@ -294,7 +294,8 @@ func (s *peersSource) logFetchBootstrapBlocksFromPeersOutcome( ) { if err == nil { shardBlockSeriesCounter := map[xtime.UnixNano]int64{} - for _, series := range shardResult.AllSeries() { + for _, entry := range shardResult.AllSeries().Iter() { + series := entry.Value() for blockStart := range series.Blocks.AllBlocks() { shardBlockSeriesCounter[blockStart]++ } @@ -364,7 +365,8 @@ func (s *peersSource) incrementalFlush( } var blockErr error - for _, s := range shardResult.AllSeries() { + for _, entry := range shardResult.AllSeries().Iter() { + s := entry.Value() bl, ok := s.Blocks.BlockAt(start) if !ok { continue @@ -434,7 +436,8 @@ func (s *peersSource) incrementalFlush( // TODO: We need this right now because nodes with older versions of M3DB will return an extra // block when requesting bootstrapped blocks. Once all the clusters have been upgraded we can // remove this code. - for _, s := range shardResult.AllSeries() { + for _, entry := range shardResult.AllSeries().Iter() { + s := entry.Value() bl, ok := s.Blocks.BlockAt(tr.End) if !ok { continue @@ -448,7 +451,8 @@ func (s *peersSource) incrementalFlush( // they will all get loaded into the shard object, and then immediately evicted on the next // tick which causes unnecessary memory pressure. numSeriesTriedToRemoveWithRemainingBlocks := 0 - for _, series := range shardResult.AllSeries() { + for _, entry := range shardResult.AllSeries().Iter() { + series := entry.Value() numBlocksRemaining := len(series.Blocks.AllBlocks()) // Should never happen since we removed all the block in the previous loop and fetching // bootstrap blocks should always be exclusive on the end side. diff --git a/storage/bootstrap/bootstrapper/peers/source_test.go b/storage/bootstrap/bootstrapper/peers/source_test.go index 775e26a7dc..d6deff591e 100644 --- a/storage/bootstrap/bootstrapper/peers/source_test.go +++ b/storage/bootstrap/bootstrapper/peers/source_test.go @@ -250,7 +250,7 @@ func TestPeersSourceIncrementalRun(t *testing.T) { Persist: func(id ident.ID, segment ts.Segment, checksum uint32) error { persists["foo"]++ assert.Equal(t, "foo", id.String()) - assert.Equal(t, []byte{1, 2, 3}, segment.Head.Get()) + assert.Equal(t, []byte{1, 2, 3}, segment.Head.Bytes()) assert.Equal(t, fooBlock.Checksum(), checksum) return nil }, @@ -270,7 +270,7 @@ func TestPeersSourceIncrementalRun(t *testing.T) { Persist: func(id ident.ID, segment ts.Segment, checksum uint32) error { persists["bar"]++ assert.Equal(t, "bar", id.String()) - assert.Equal(t, []byte{4, 5, 6}, segment.Head.Get()) + assert.Equal(t, []byte{4, 5, 6}, segment.Head.Bytes()) assert.Equal(t, barBlock.Checksum(), checksum) return nil }, @@ -290,7 +290,7 @@ func TestPeersSourceIncrementalRun(t *testing.T) { Persist: func(id ident.ID, segment ts.Segment, checksum uint32) error { persists["baz"]++ assert.Equal(t, "baz", id.String()) - assert.Equal(t, []byte{7, 8, 9}, segment.Head.Get()) + assert.Equal(t, []byte{7, 8, 9}, segment.Head.Bytes()) assert.Equal(t, bazBlock.Checksum(), checksum) return nil }, diff --git a/storage/bootstrap/result/map_gen.go b/storage/bootstrap/result/map_gen.go new file mode 100644 index 0000000000..069e6875c1 --- /dev/null +++ b/storage/bootstrap/result/map_gen.go @@ -0,0 +1,259 @@ +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +package result + +import ( + "github.com/m3db/m3x/ident" +) + +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// MapHash is the hash for a given map entry, this is public to support +// iterating over the map using a native Go for loop. +type MapHash uint64 + +// HashFn is the hash function to execute when hashing a key. +type HashFn func(ident.ID) MapHash + +// EqualsFn is the equals key function to execute when detecting equality of a key. +type EqualsFn func(ident.ID, ident.ID) bool + +// CopyFn is the copy key function to execute when copying the key. +type CopyFn func(ident.ID) ident.ID + +// FinalizeFn is the finalize key function to execute when finished with a key. +type FinalizeFn func(ident.ID) + +// Map uses the genny package to provide a generic hash map that can be specialized +// by running the following command from this root of the repository: +// ``` +// make hashmap-gen pkg=outpkg key_type=Type value_type=Type out_dir=/tmp +// ``` +// Or if you would like to use bytes or ident.ID as keys you can use the +// partially specialized maps to generate your own maps as well: +// ``` +// make byteshashmap-gen pkg=outpkg value_type=Type out_dir=/tmp +// make idhashmap-gen pkg=outpkg value_type=Type out_dir=/tmp +// ``` +// This will output to stdout the generated source file to use for your map. +// It uses linear probing by incrementing the number of the hash created when +// hashing the identifier if there is a collision. +// Map is a value type and not an interface to allow for less painful +// upgrades when adding/removing methods, it is not likely to need mocking so +// an interface would not be super useful either. +type Map struct { + mapOptions + + // lookup uses hash of the identifier for the key and the MapEntry value + // wraps the value type and the key (used to ensure lookup is correct + // when dealing with collisions), we use uint64 for the hash partially + // because lookups of maps with uint64 keys has a fast path for Go. + lookup map[MapHash]MapEntry +} + +// mapOptions is a set of options used when creating an identifier map, it is kept +// private so that implementers of the generated map can specify their own options +// that partially fulfill these options. +type mapOptions struct { + // hash is the hash function to execute when hashing a key. + hash HashFn + // equals is the equals key function to execute when detecting equality. + equals EqualsFn + // copy is the copy key function to execute when copying the key. + copy CopyFn + // finalize is the finalize key function to execute when finished with a + // key, this is optional to specify. + finalize FinalizeFn + // initialSize is the initial size for the map, use zero to use Go's std map + // initial size and consequently is optional to specify. + initialSize int +} + +// MapEntry is an entry in the map, this is public to support iterating +// over the map using a native Go for loop. +type MapEntry struct { + // key is used to check equality on lookups to resolve collisions + key mapKey + // value type stored + value DatabaseSeriesBlocks +} + +type mapKey struct { + key ident.ID + finalize bool +} + +// Key returns the map entry key. +func (e MapEntry) Key() ident.ID { + return e.key.key +} + +// Value returns the map entry value. +func (e MapEntry) Value() DatabaseSeriesBlocks { + return e.value +} + +// mapAlloc is a non-exported function so that when generating the source code +// for the map you can supply a public constructor that sets the correct +// hash, equals, copy, finalize options without users of the map needing to +// implement them themselves. +func mapAlloc(opts mapOptions) *Map { + m := &Map{mapOptions: opts} + m.Reallocate() + return m +} + +func (m *Map) newMapKey(k ident.ID, opts mapKeyOptions) mapKey { + key := mapKey{key: k, finalize: opts.finalizeKey} + if !opts.copyKey { + return key + } + + key.key = m.copy(k) + return key +} + +func (m *Map) removeMapKey(hash MapHash, key mapKey) { + delete(m.lookup, hash) + if key.finalize { + m.finalize(key.key) + } +} + +// Get returns a value in the map for an identifier if found. +func (m *Map) Get(k ident.ID) (DatabaseSeriesBlocks, bool) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + return entry.value, true + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } + var empty DatabaseSeriesBlocks + return empty, false +} + +// Set will set the value for an identifier. +func (m *Map) Set(k ident.ID, v DatabaseSeriesBlocks) { + m.set(k, v, mapKeyOptions{ + copyKey: true, + finalizeKey: m.finalize != nil, + }) +} + +// SetUnsafeOptions is a set of options to use when setting a value with +// the SetUnsafe method. +type SetUnsafeOptions struct { + NoCopyKey bool + NoFinalizeKey bool +} + +// SetUnsafe will set the value for an identifier with unsafe options for how +// the map treats the key. +func (m *Map) SetUnsafe(k ident.ID, v DatabaseSeriesBlocks, opts SetUnsafeOptions) { + m.set(k, v, mapKeyOptions{ + copyKey: !opts.NoCopyKey, + finalizeKey: !opts.NoFinalizeKey, + }) +} + +type mapKeyOptions struct { + copyKey bool + finalizeKey bool +} + +func (m *Map) set(k ident.ID, v DatabaseSeriesBlocks, opts mapKeyOptions) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + m.lookup[hash] = MapEntry{ + key: entry.key, + value: v, + } + return + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } + + m.lookup[hash] = MapEntry{ + key: m.newMapKey(k, opts), + value: v, + } +} + +// Iter provides the underlying map to allow for using a native Go for loop +// to iterate the map, however callers should only ever read and not write +// the map. +func (m *Map) Iter() map[MapHash]MapEntry { + return m.lookup +} + +// Len returns the number of map entries in the map. +func (m *Map) Len() int { + return len(m.lookup) +} + +// Contains returns true if value exists for key, false otherwise, it is +// shorthand for a call to Get that doesn't return the value. +func (m *Map) Contains(k ident.ID) bool { + _, ok := m.Get(k) + return ok +} + +// Delete will remove a value set in the map for the specified key. +func (m *Map) Delete(k ident.ID) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + m.removeMapKey(hash, entry.key) + return + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } +} + +// Reset will reset the map by simply deleting all keys to avoid +// allocating a new map. +func (m *Map) Reset() { + for hash, entry := range m.lookup { + m.removeMapKey(hash, entry.key) + } +} + +// Reallocate will avoid deleting all keys and reallocate a new +// map, this is useful if you believe you have a large map and +// will not need to grow back to a similar size. +func (m *Map) Reallocate() { + if m.initialSize > 0 { + m.lookup = make(map[MapHash]MapEntry, m.initialSize) + } else { + m.lookup = make(map[MapHash]MapEntry) + } +} diff --git a/storage/bootstrap/result/new_map_gen.go b/storage/bootstrap/result/new_map_gen.go new file mode 100644 index 0000000000..26f338d862 --- /dev/null +++ b/storage/bootstrap/result/new_map_gen.go @@ -0,0 +1,76 @@ +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +package result + +import ( + "github.com/m3db/m3x/ident" + + "github.com/m3db/m3x/pool" + + "github.com/cespare/xxhash" +) + +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// MapOptions provides options used when created the map. +type MapOptions struct { + InitialSize int + KeyCopyPool pool.BytesPool +} + +// NewMap returns a new byte keyed map. +func NewMap(opts MapOptions) *Map { + var ( + copyFn CopyFn + finalizeFn FinalizeFn + ) + if pool := opts.KeyCopyPool; pool == nil { + copyFn = func(k ident.ID) ident.ID { + return ident.BytesID(append([]byte(nil), k.Bytes()...)) + } + } else { + copyFn = func(k ident.ID) ident.ID { + bytes := k.Bytes() + keyLen := len(bytes) + pooled := pool.Get(keyLen)[:keyLen] + copy(pooled, bytes) + return ident.BytesID(pooled) + } + finalizeFn = func(k ident.ID) { + if slice, ok := k.(ident.BytesID); ok { + pool.Put(slice) + } + } + } + return mapAlloc(mapOptions{ + hash: func(id ident.ID) MapHash { + return MapHash(xxhash.Sum64(id.Bytes())) + }, + equals: func(x, y ident.ID) bool { + return x.Equal(y) + }, + copy: copyFn, + finalize: finalizeFn, + initialSize: opts.InitialSize, + }) +} diff --git a/storage/bootstrap/result/result.go b/storage/bootstrap/result/result.go index 1f6b139961..351987dabe 100644 --- a/storage/bootstrap/result/result.go +++ b/storage/bootstrap/result/result.go @@ -73,10 +73,10 @@ func MergedBootstrapResult(i, j BootstrapResult) BootstrapResult { } sizeI, sizeJ := 0, 0 for _, sr := range i.ShardResults() { - sizeI += len(sr.AllSeries()) + sizeI += int(sr.NumSeries()) } for _, sr := range j.ShardResults() { - sizeJ += len(sr.AllSeries()) + sizeJ += int(sr.NumSeries()) } if sizeI >= sizeJ { i.ShardResults().AddResults(j.ShardResults()) @@ -90,38 +90,41 @@ func MergedBootstrapResult(i, j BootstrapResult) BootstrapResult { type shardResult struct { opts Options - blocks map[ident.Hash]DatabaseSeriesBlocks + blocks *Map } // NewShardResult creates a new shard result. func NewShardResult(capacity int, opts Options) ShardResult { return &shardResult{ - opts: opts, - blocks: make(map[ident.Hash]DatabaseSeriesBlocks, capacity), + opts: opts, + blocks: NewMap(MapOptions{ + InitialSize: capacity, + KeyCopyPool: opts.DatabaseBlockOptions().BytesPool().BytesPool(), + }), } } // IsEmpty returns whether the result is empty. func (sr *shardResult) IsEmpty() bool { - return len(sr.blocks) == 0 + return sr.blocks.Len() == 0 } // AddBlock adds a data block. func (sr *shardResult) AddBlock(id ident.ID, b block.DatabaseBlock) { - curSeries, exists := sr.blocks[id.Hash()] + curSeries, exists := sr.blocks.Get(id) if !exists { curSeries = sr.newBlocks(id) - sr.blocks[id.Hash()] = curSeries + sr.blocks.Set(id, curSeries) } curSeries.Blocks.AddBlock(b) } // AddSeries adds a single series. func (sr *shardResult) AddSeries(id ident.ID, rawSeries block.DatabaseSeriesBlocks) { - curSeries, exists := sr.blocks[id.Hash()] + curSeries, exists := sr.blocks.Get(id) if !exists { curSeries = sr.newBlocks(id) - sr.blocks[id.Hash()] = curSeries + sr.blocks.Set(id, curSeries) } curSeries.Blocks.AddSeries(rawSeries) } @@ -140,14 +143,15 @@ func (sr *shardResult) AddResult(other ShardResult) { return } otherSeries := other.AllSeries() - for _, series := range otherSeries { + for _, entry := range otherSeries.Iter() { + series := entry.Value() sr.AddSeries(series.ID, series.Blocks) } } // RemoveBlockAt removes a data block at a given timestamp func (sr *shardResult) RemoveBlockAt(id ident.ID, t time.Time) { - curSeries, exists := sr.blocks[id.Hash()] + curSeries, exists := sr.blocks.Get(id) if !exists { return } @@ -159,20 +163,20 @@ func (sr *shardResult) RemoveBlockAt(id ident.ID, t time.Time) { // RemoveSeries removes a single series of blocks. func (sr *shardResult) RemoveSeries(id ident.ID) { - delete(sr.blocks, id.Hash()) + sr.blocks.Delete(id) } // AllSeries returns all series in the map. -func (sr *shardResult) AllSeries() map[ident.Hash]DatabaseSeriesBlocks { +func (sr *shardResult) AllSeries() *Map { return sr.blocks } func (sr *shardResult) NumSeries() int64 { - return int64(len(sr.blocks)) + return int64(sr.blocks.Len()) } func (sr *shardResult) BlockAt(id ident.ID, t time.Time) (block.DatabaseBlock, bool) { - series, exists := sr.blocks[id.Hash()] + series, exists := sr.blocks.Get(id) if !exists { return nil, false } @@ -181,7 +185,8 @@ func (sr *shardResult) BlockAt(id ident.ID, t time.Time) (block.DatabaseBlock, b // Close closes a shard result. func (sr *shardResult) Close() { - for _, series := range sr.blocks { + for _, entry := range sr.blocks.Iter() { + series := entry.Value() series.Blocks.Close() } } @@ -198,7 +203,7 @@ func (r ShardResults) NumSeries() int64 { // AddResults adds other shard results to the current shard results. func (r ShardResults) AddResults(other ShardResults) { for shard, result := range other { - if result == nil || len(result.AllSeries()) == 0 { + if result == nil || result.NumSeries() == 0 { continue } if existing, ok := r[shard]; ok { @@ -219,11 +224,12 @@ func (r ShardResults) Equal(other ShardResults) bool { } allSeries := result.AllSeries() otherAllSeries := otherResult.AllSeries() - if len(allSeries) != len(otherAllSeries) { + if allSeries.Len() != otherAllSeries.Len() { return false } - for id, series := range allSeries { - otherSeries, ok := otherAllSeries[id] + for _, entry := range allSeries.Iter() { + id, series := entry.Key(), entry.Value() + otherSeries, ok := otherAllSeries.Get(id) if !ok { return false } diff --git a/storage/bootstrap/result/result_test.go b/storage/bootstrap/result/result_test.go index ec6888da93..277bb27f3a 100644 --- a/storage/bootstrap/result/result_test.go +++ b/storage/bootstrap/result/result_test.go @@ -248,9 +248,13 @@ func TestShardResultAddBlock(t *testing.T) { sr.AddBlock(ident.StringID(input.id), block) } allSeries := sr.AllSeries() - require.Len(t, allSeries, 2) - require.Equal(t, 2, allSeries[ident.StringID("foo").Hash()].Blocks.Len()) - require.Equal(t, 1, allSeries[ident.StringID("bar").Hash()].Blocks.Len()) + require.Equal(t, 2, allSeries.Len()) + fooBlocks, ok := allSeries.Get(ident.StringID("foo")) + require.True(t, ok) + require.Equal(t, 2, fooBlocks.Blocks.Len()) + barBlocks, ok := allSeries.Get(ident.StringID("bar")) + require.True(t, ok) + require.Equal(t, 1, barBlocks.Blocks.Len()) } func TestShardResultAddSeries(t *testing.T) { @@ -273,9 +277,13 @@ func TestShardResultAddSeries(t *testing.T) { moreSeries.AddBlock(block) sr.AddSeries(ident.StringID("foo"), moreSeries) allSeries := sr.AllSeries() - require.Len(t, allSeries, 2) - require.Equal(t, 1, allSeries[ident.StringID("foo").Hash()].Blocks.Len()) - require.Equal(t, 0, allSeries[ident.StringID("bar").Hash()].Blocks.Len()) + require.Equal(t, 2, allSeries.Len()) + fooBlocks, ok := allSeries.Get(ident.StringID("foo")) + require.True(t, ok) + require.Equal(t, 1, fooBlocks.Blocks.Len()) + barBlocks, ok := allSeries.Get(ident.StringID("bar")) + require.True(t, ok) + require.Equal(t, 0, barBlocks.Blocks.Len()) } func TestShardResultAddResult(t *testing.T) { @@ -287,7 +295,7 @@ func TestShardResultAddResult(t *testing.T) { other.AddSeries(ident.StringID("foo"), block.NewDatabaseSeriesBlocks(0)) other.AddSeries(ident.StringID("bar"), block.NewDatabaseSeriesBlocks(0)) sr.AddResult(other) - require.Len(t, sr.AllSeries(), 2) + require.Equal(t, 2, sr.AllSeries().Len()) } func TestShardResultNumSeries(t *testing.T) { @@ -315,11 +323,11 @@ func TestShardResultRemoveSeries(t *testing.T) { for _, input := range inputs { sr.AddSeries(ident.StringID(input.id), input.series) } - require.Equal(t, 2, len(sr.AllSeries())) + require.Equal(t, 2, sr.AllSeries().Len()) sr.RemoveSeries(ident.StringID("foo")) - require.Equal(t, 1, len(sr.AllSeries())) + require.Equal(t, 1, sr.AllSeries().Len()) sr.RemoveSeries(ident.StringID("nonexistent")) - require.Equal(t, 1, len(sr.AllSeries())) + require.Equal(t, 1, sr.AllSeries().Len()) } func TestShardTimeRangesIsEmpty(t *testing.T) { diff --git a/storage/bootstrap/result/types.go b/storage/bootstrap/result/types.go index b0787ea87d..bd3ba5d72b 100644 --- a/storage/bootstrap/result/types.go +++ b/storage/bootstrap/result/types.go @@ -55,8 +55,8 @@ type ShardResult interface { // or nil if there is no such block. BlockAt(id ident.ID, t time.Time) (block.DatabaseBlock, bool) - // AllSeries returns all series of blocks. - AllSeries() map[ident.Hash]DatabaseSeriesBlocks + // AllSeries returns a map of all series with their associated blocks. + AllSeries() *Map // NumSeries returns the number of distinct series'. NumSeries() int64 diff --git a/storage/bootstrap_test.go b/storage/bootstrap_test.go index 4606fc5080..ff9ba7be64 100644 --- a/storage/bootstrap_test.go +++ b/storage/bootstrap_test.go @@ -22,6 +22,7 @@ package storage import ( "fmt" + "sync" "testing" "time" @@ -65,6 +66,62 @@ func TestDatabaseBootstrapWithBootstrapError(t *testing.T) { require.Equal(t, bootstrapped, bsm.state) } +func TestDatabaseBootstrapSubsequentCallsQueued(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + opts := testDatabaseOptions() + now := time.Now() + opts = opts. + SetBootstrapProcess(nil). + SetClockOptions(opts.ClockOptions().SetNowFn(func() time.Time { + return now + })) + + m := NewMockdatabaseMediator(ctrl) + m.EXPECT().DisableFileOps() + m.EXPECT().EnableFileOps().AnyTimes() + + db := NewMockdatabase(ctrl) + + bsm := newBootstrapManager(db, m, opts).(*bootstrapManager) + + ns := NewMockdatabaseNamespace(ctrl) + ns.EXPECT().Options().Return(namespace.NewOptions()).Times(2) + + var wg sync.WaitGroup + wg.Add(1) + ns.EXPECT(). + Bootstrap(nil, gomock.Any()). + Return(nil). + Do(func(arg0, arg1 interface{}) { + defer wg.Done() + + // Enqueue the second bootstrap + err := bsm.Bootstrap() + assert.Error(t, err) + assert.Equal(t, errBootstrapEnqueued, err) + assert.False(t, bsm.IsBootstrapped()) + bsm.RLock() + assert.Equal(t, true, bsm.hasPending) + bsm.RUnlock() + + // Expect the second bootstrap call + ns.EXPECT().Bootstrap(nil, gomock.Any()).Return(nil) + }) + ns.EXPECT(). + ID(). + Return(ident.StringID("test")). + Times(2) + db.EXPECT(). + GetOwnedNamespaces(). + Return([]databaseNamespace{ns}, nil). + Times(2) + + err := bsm.Bootstrap() + require.Nil(t, err) +} + func TestDatabaseBootstrapTargetRanges(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() diff --git a/storage/database.go b/storage/database.go index a0354b2423..b531dc0eb3 100644 --- a/storage/database.go +++ b/storage/database.go @@ -83,7 +83,7 @@ type db struct { nsWatch databaseNamespaceWatch shardSet sharding.ShardSet - namespaces map[ident.Hash]databaseNamespace + namespaces *databaseNamespacesMap commitLog commitlog.CommitLog state databaseState @@ -152,7 +152,7 @@ func NewDatabase( opts: opts, nowFn: opts.ClockOptions().NowFn(), shardSet: shardSet, - namespaces: make(map[ident.Hash]databaseNamespace), + namespaces: newDatabaseNamespacesMap(databaseNamespacesMapOptions{}), commitLog: commitLog, scope: scope, metrics: newDatabaseMetrics(scope), @@ -239,7 +239,8 @@ func (d *db) namespaceDeltaWithLock(newNamespaces namespace.Map) ([]ident.ID, [] ) // check if existing namespaces exist in newNamespaces - for _, ns := range existing { + for _, entry := range existing.Iter() { + ns := entry.Value() newMd, err := newNamespaces.Get(ns.ID()) // if a namespace doesn't exist in newNamespaces, mark for removal @@ -262,7 +263,7 @@ func (d *db) namespaceDeltaWithLock(newNamespaces namespace.Map) ([]ident.ID, [] // check for any namespaces that need to be added for _, ns := range newNamespaces.Metadatas() { - _, exists := d.namespaces[ns.ID().Hash()] + _, exists := d.namespaces.Get(ns.ID()) if !exists { adds = append(adds, ns) } @@ -303,7 +304,7 @@ func (d *db) logNamespaceUpdate(removes []ident.ID, adds, updates []namespace.Me func (d *db) addNamespacesWithLock(namespaces []namespace.Metadata) error { for _, n := range namespaces { // ensure namespace doesn't exist - _, ok := d.namespaces[n.ID().Hash()] + _, ok := d.namespaces.Get(n.ID()) if ok { // should never happen return fmt.Errorf("existing namespace marked for addition: %v", n.ID().String()) } @@ -313,7 +314,7 @@ func (d *db) addNamespacesWithLock(namespaces []namespace.Metadata) error { if err != nil { return err } - d.namespaces[n.ID().Hash()] = newNs + d.namespaces.Set(n.ID(), newNs) } return nil } @@ -343,7 +344,8 @@ func (d *db) AssignShardSet(shardSet sharding.ShardSet) { d.Lock() defer d.Unlock() d.shardSet = shardSet - for _, ns := range d.namespaces { + for _, elem := range d.namespaces.Iter() { + ns := elem.Value() ns.AssignShardSet(shardSet) } d.queueBootstrapWithLock() @@ -372,16 +374,15 @@ func (d *db) queueBootstrapWithLock() { func (d *db) Namespace(id ident.ID) (Namespace, bool) { d.RLock() defer d.RUnlock() - ns, ok := d.namespaces[id.Hash()] - return ns, ok + return d.namespaces.Get(id) } func (d *db) Namespaces() []Namespace { d.RLock() defer d.RUnlock() - namespaces := make([]Namespace, 0, len(d.namespaces)) - for _, elem := range d.namespaces { - namespaces = append(namespaces, elem) + namespaces := make([]Namespace, 0, d.namespaces.Len()) + for _, elem := range d.namespaces.Iter() { + namespaces = append(namespaces, elem.Value()) } return namespaces } @@ -442,7 +443,7 @@ func (d *db) terminateWithLock() error { // NB(prateek): Terminate is meant to return quickly, so we rely upon // the gc to clean up any resources held by namespaces, and just set // our reference to the namespaces to nil. - d.namespaces = nil + d.namespaces.Reallocate() // Finally close the commit log return d.commitLog.Close() @@ -643,7 +644,7 @@ func (d *db) IsOverloaded() bool { func (d *db) namespaceFor(namespace ident.ID) (databaseNamespace, error) { d.RLock() - n, exists := d.namespaces[namespace.Hash()] + n, exists := d.namespaces.Get(namespace) d.RUnlock() if !exists { @@ -653,9 +654,9 @@ func (d *db) namespaceFor(namespace ident.ID) (databaseNamespace, error) { } func (d *db) ownedNamespacesWithLock() []databaseNamespace { - namespaces := make([]databaseNamespace, 0, len(d.namespaces)) - for _, n := range d.namespaces { - namespaces = append(namespaces, n) + namespaces := make([]databaseNamespace, 0, d.namespaces.Len()) + for _, n := range d.namespaces.Iter() { + namespaces = append(namespaces, n.Value()) } return namespaces } diff --git a/storage/database_test.go b/storage/database_test.go index af997fa5e5..7eddfb4ddc 100644 --- a/storage/database_test.go +++ b/storage/database_test.go @@ -194,7 +194,7 @@ func dbAddNewMockNamespace( ns := ident.StringID(id) mockNamespace := NewMockdatabaseNamespace(ctrl) mockNamespace.EXPECT().ID().Return(ns).AnyTimes() - d.namespaces[ns.Hash()] = mockNamespace + d.namespaces.Set(ns, mockNamespace) return mockNamespace } @@ -273,7 +273,7 @@ func TestDatabaseReadEncodedNamespaceOwned(t *testing.T) { start := end.Add(-time.Hour) mockNamespace := NewMockdatabaseNamespace(ctrl) mockNamespace.EXPECT().ReadEncoded(ctx, id, start, end).Return(nil, nil) - d.namespaces[ns.Hash()] = mockNamespace + d.namespaces.Set(ns, mockNamespace) res, err := d.ReadEncoded(ctx, ns, id, start, end) require.Nil(t, res) @@ -319,7 +319,7 @@ func TestDatabaseFetchBlocksNamespaceOwned(t *testing.T) { expected := []block.FetchBlockResult{block.NewFetchBlockResult(starts[0], nil, nil)} mockNamespace := NewMockdatabaseNamespace(ctrl) mockNamespace.EXPECT().FetchBlocks(ctx, shardID, id, starts).Return(expected, nil) - d.namespaces[ns.Hash()] = mockNamespace + d.namespaces.Set(ns, mockNamespace) res, err := d.FetchBlocks(ctx, ns, shardID, id, starts) require.Equal(t, expected, res) @@ -390,7 +390,7 @@ func TestDatabaseFetchBlocksMetadataShardOwned(t *testing.T) { EXPECT(). FetchBlocksMetadata(ctx, shardID, start, end, limit, pageToken, opts). Return(expectedBlocks, expectedToken, nil) - d.namespaces[ns.Hash()] = mockNamespace + d.namespaces.Set(ns, mockNamespace) res, nextToken, err := d.FetchBlocksMetadata(ctx, ns, shardID, start, end, limit, pageToken, opts) require.Equal(t, expectedBlocks, res) diff --git a/storage/index.go b/storage/index.go index 5ea4b5ed4f..2889b6bbba 100644 --- a/storage/index.go +++ b/storage/index.go @@ -276,7 +276,7 @@ func (i *nsIndex) doc(id ident.ID, tags ident.Tags) (doc.Document, error) { // any ids provided, as we need to maintain the lifecycle of the indexed // bytes separately from the rest of the storage subsystem. func (i *nsIndex) clone(id ident.ID) []byte { - original := id.Data().Get() + original := id.Data().Bytes() clone := make([]byte, len(original)) copy(clone, original) return clone diff --git a/storage/namespace.go b/storage/namespace.go index 36c171533f..ace54aee0d 100644 --- a/storage/namespace.go +++ b/storage/namespace.go @@ -683,9 +683,11 @@ func (n *dbNamespace) Bootstrap( shard := shard wg.Add(1) workers.Go(func() { - var bootstrapped map[ident.Hash]result.DatabaseSeriesBlocks - if result, ok := results[shard.ID()]; ok { - bootstrapped = result.AllSeries() + var bootstrapped *result.Map + if shardResult, ok := results[shard.ID()]; ok { + bootstrapped = shardResult.AllSeries() + } else { + bootstrapped = result.NewMap(result.MapOptions{}) } err := shard.Bootstrap(bootstrapped) diff --git a/storage/namespace/map.go b/storage/namespace/map.go index 05496f3ab7..f80f647e33 100644 --- a/storage/namespace/map.go +++ b/storage/namespace/map.go @@ -33,35 +33,34 @@ var ( ) type nsMap struct { - namespaces map[ident.Hash]Metadata + namespaces *metadataMap ids []ident.ID metadatas []Metadata } -// NewMap returns a new registry containing provided metadatas +// NewMap returns a new registry containing provided metadatas, +// providing a consistent order. func NewMap(metadatas []Metadata) (Map, error) { if len(metadatas) == 0 { return nil, errEmptyMetadatas } var ( - ns = make(map[ident.Hash]Metadata, len(metadatas)) + ns = newMetadataMap(metadataMapOptions{}) ids = make([]ident.ID, 0, len(metadatas)) nsMetadatas = make([]Metadata, 0, len(metadatas)) - idsMap = make(map[ident.Hash]struct{}) multiErr xerrors.MultiError ) for _, m := range metadatas { id := m.ID() ids = append(ids, id) nsMetadatas = append(nsMetadatas, m) - ns[id.Hash()] = m - if _, ok := idsMap[id.Hash()]; ok { + if _, ok := ns.Get(id); ok { multiErr = multiErr.Add(fmt.Errorf( "namespace ids must be unique, duplicate found: %v", id.String())) } - idsMap[id.Hash()] = struct{}{} + ns.Set(id, m) } if err := multiErr.FinalError(); err != nil { @@ -76,8 +75,7 @@ func NewMap(metadatas []Metadata) (Map, error) { } func (r *nsMap) Get(namespace ident.ID) (Metadata, error) { - idHash := namespace.Hash() - metadata, ok := r.namespaces[idHash] + metadata, ok := r.namespaces.Get(namespace) if !ok { return nil, fmt.Errorf("unable to find namespace (%v) in registry", namespace.String()) } diff --git a/storage/namespace/metadata.go b/storage/namespace/metadata.go index 5d22dd2622..9dae8a4e77 100644 --- a/storage/namespace/metadata.go +++ b/storage/namespace/metadata.go @@ -53,7 +53,7 @@ func NewMetadata(id ident.ID, opts Options) (Metadata, error) { } - copiedID := checked.NewBytes(append([]byte(nil), id.Data().Get()...), nil) + copiedID := checked.NewBytes(append([]byte(nil), id.Data().Bytes()...), nil) return &metadata{ id: ident.BinaryID(copiedID), opts: opts, diff --git a/storage/namespace/metadata_map_gen.go b/storage/namespace/metadata_map_gen.go new file mode 100644 index 0000000000..34cf27b92b --- /dev/null +++ b/storage/namespace/metadata_map_gen.go @@ -0,0 +1,259 @@ +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +package namespace + +import ( + "github.com/m3db/m3x/ident" +) + +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// metadataMapHash is the hash for a given map entry, this is public to support +// iterating over the map using a native Go for loop. +type metadataMapHash uint64 + +// metadataMapHashFn is the hash function to execute when hashing a key. +type metadataMapHashFn func(ident.ID) metadataMapHash + +// metadataMapEqualsFn is the equals key function to execute when detecting equality of a key. +type metadataMapEqualsFn func(ident.ID, ident.ID) bool + +// metadataMapCopyFn is the copy key function to execute when copying the key. +type metadataMapCopyFn func(ident.ID) ident.ID + +// metadataMapFinalizeFn is the finalize key function to execute when finished with a key. +type metadataMapFinalizeFn func(ident.ID) + +// metadataMap uses the genny package to provide a generic hash map that can be specialized +// by running the following command from this root of the repository: +// ``` +// make hashmap-gen pkg=outpkg key_type=Type value_type=Type out_dir=/tmp +// ``` +// Or if you would like to use bytes or ident.ID as keys you can use the +// partially specialized maps to generate your own maps as well: +// ``` +// make byteshashmap-gen pkg=outpkg value_type=Type out_dir=/tmp +// make idhashmap-gen pkg=outpkg value_type=Type out_dir=/tmp +// ``` +// This will output to stdout the generated source file to use for your map. +// It uses linear probing by incrementing the number of the hash created when +// hashing the identifier if there is a collision. +// metadataMap is a value type and not an interface to allow for less painful +// upgrades when adding/removing methods, it is not likely to need mocking so +// an interface would not be super useful either. +type metadataMap struct { + _metadataMapOptions + + // lookup uses hash of the identifier for the key and the MapEntry value + // wraps the value type and the key (used to ensure lookup is correct + // when dealing with collisions), we use uint64 for the hash partially + // because lookups of maps with uint64 keys has a fast path for Go. + lookup map[metadataMapHash]metadataMapEntry +} + +// _metadataMapOptions is a set of options used when creating an identifier map, it is kept +// private so that implementers of the generated map can specify their own options +// that partially fulfill these options. +type _metadataMapOptions struct { + // hash is the hash function to execute when hashing a key. + hash metadataMapHashFn + // equals is the equals key function to execute when detecting equality. + equals metadataMapEqualsFn + // copy is the copy key function to execute when copying the key. + copy metadataMapCopyFn + // finalize is the finalize key function to execute when finished with a + // key, this is optional to specify. + finalize metadataMapFinalizeFn + // initialSize is the initial size for the map, use zero to use Go's std map + // initial size and consequently is optional to specify. + initialSize int +} + +// metadataMapEntry is an entry in the map, this is public to support iterating +// over the map using a native Go for loop. +type metadataMapEntry struct { + // key is used to check equality on lookups to resolve collisions + key _metadataMapKey + // value type stored + value Metadata +} + +type _metadataMapKey struct { + key ident.ID + finalize bool +} + +// Key returns the map entry key. +func (e metadataMapEntry) Key() ident.ID { + return e.key.key +} + +// Value returns the map entry value. +func (e metadataMapEntry) Value() Metadata { + return e.value +} + +// _metadataMapAlloc is a non-exported function so that when generating the source code +// for the map you can supply a public constructor that sets the correct +// hash, equals, copy, finalize options without users of the map needing to +// implement them themselves. +func _metadataMapAlloc(opts _metadataMapOptions) *metadataMap { + m := &metadataMap{_metadataMapOptions: opts} + m.Reallocate() + return m +} + +func (m *metadataMap) newMapKey(k ident.ID, opts _metadataMapKeyOptions) _metadataMapKey { + key := _metadataMapKey{key: k, finalize: opts.finalizeKey} + if !opts.copyKey { + return key + } + + key.key = m.copy(k) + return key +} + +func (m *metadataMap) removeMapKey(hash metadataMapHash, key _metadataMapKey) { + delete(m.lookup, hash) + if key.finalize { + m.finalize(key.key) + } +} + +// Get returns a value in the map for an identifier if found. +func (m *metadataMap) Get(k ident.ID) (Metadata, bool) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + return entry.value, true + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } + var empty Metadata + return empty, false +} + +// Set will set the value for an identifier. +func (m *metadataMap) Set(k ident.ID, v Metadata) { + m.set(k, v, _metadataMapKeyOptions{ + copyKey: true, + finalizeKey: m.finalize != nil, + }) +} + +// metadataMapSetUnsafeOptions is a set of options to use when setting a value with +// the SetUnsafe method. +type metadataMapSetUnsafeOptions struct { + NoCopyKey bool + NoFinalizeKey bool +} + +// SetUnsafe will set the value for an identifier with unsafe options for how +// the map treats the key. +func (m *metadataMap) SetUnsafe(k ident.ID, v Metadata, opts metadataMapSetUnsafeOptions) { + m.set(k, v, _metadataMapKeyOptions{ + copyKey: !opts.NoCopyKey, + finalizeKey: !opts.NoFinalizeKey, + }) +} + +type _metadataMapKeyOptions struct { + copyKey bool + finalizeKey bool +} + +func (m *metadataMap) set(k ident.ID, v Metadata, opts _metadataMapKeyOptions) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + m.lookup[hash] = metadataMapEntry{ + key: entry.key, + value: v, + } + return + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } + + m.lookup[hash] = metadataMapEntry{ + key: m.newMapKey(k, opts), + value: v, + } +} + +// Iter provides the underlying map to allow for using a native Go for loop +// to iterate the map, however callers should only ever read and not write +// the map. +func (m *metadataMap) Iter() map[metadataMapHash]metadataMapEntry { + return m.lookup +} + +// Len returns the number of map entries in the map. +func (m *metadataMap) Len() int { + return len(m.lookup) +} + +// Contains returns true if value exists for key, false otherwise, it is +// shorthand for a call to Get that doesn't return the value. +func (m *metadataMap) Contains(k ident.ID) bool { + _, ok := m.Get(k) + return ok +} + +// Delete will remove a value set in the map for the specified key. +func (m *metadataMap) Delete(k ident.ID) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + m.removeMapKey(hash, entry.key) + return + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } +} + +// Reset will reset the map by simply deleting all keys to avoid +// allocating a new map. +func (m *metadataMap) Reset() { + for hash, entry := range m.lookup { + m.removeMapKey(hash, entry.key) + } +} + +// Reallocate will avoid deleting all keys and reallocate a new +// map, this is useful if you believe you have a large map and +// will not need to grow back to a similar size. +func (m *metadataMap) Reallocate() { + if m.initialSize > 0 { + m.lookup = make(map[metadataMapHash]metadataMapEntry, m.initialSize) + } else { + m.lookup = make(map[metadataMapHash]metadataMapEntry) + } +} diff --git a/storage/namespace/metadata_new_map_gen.go b/storage/namespace/metadata_new_map_gen.go new file mode 100644 index 0000000000..2f3a5e874b --- /dev/null +++ b/storage/namespace/metadata_new_map_gen.go @@ -0,0 +1,76 @@ +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +package namespace + +import ( + "github.com/m3db/m3x/ident" + + "github.com/m3db/m3x/pool" + + "github.com/cespare/xxhash" +) + +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// metadataMapOptions provides options used when created the map. +type metadataMapOptions struct { + InitialSize int + KeyCopyPool pool.BytesPool +} + +// newMetadataMap returns a new byte keyed map. +func newMetadataMap(opts metadataMapOptions) *metadataMap { + var ( + copyFn metadataMapCopyFn + finalizeFn metadataMapFinalizeFn + ) + if pool := opts.KeyCopyPool; pool == nil { + copyFn = func(k ident.ID) ident.ID { + return ident.BytesID(append([]byte(nil), k.Bytes()...)) + } + } else { + copyFn = func(k ident.ID) ident.ID { + bytes := k.Bytes() + keyLen := len(bytes) + pooled := pool.Get(keyLen)[:keyLen] + copy(pooled, bytes) + return ident.BytesID(pooled) + } + finalizeFn = func(k ident.ID) { + if slice, ok := k.(ident.BytesID); ok { + pool.Put(slice) + } + } + } + return _metadataMapAlloc(_metadataMapOptions{ + hash: func(id ident.ID) metadataMapHash { + return metadataMapHash(xxhash.Sum64(id.Bytes())) + }, + equals: func(x, y ident.ID) bool { + return x.Equal(y) + }, + copy: copyFn, + finalize: finalizeFn, + initialSize: opts.InitialSize, + }) +} diff --git a/storage/namespace_map_gen.go b/storage/namespace_map_gen.go new file mode 100644 index 0000000000..7c56bf5b00 --- /dev/null +++ b/storage/namespace_map_gen.go @@ -0,0 +1,259 @@ +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +package storage + +import ( + "github.com/m3db/m3x/ident" +) + +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// databaseNamespacesMapHash is the hash for a given map entry, this is public to support +// iterating over the map using a native Go for loop. +type databaseNamespacesMapHash uint64 + +// databaseNamespacesMapHashFn is the hash function to execute when hashing a key. +type databaseNamespacesMapHashFn func(ident.ID) databaseNamespacesMapHash + +// databaseNamespacesMapEqualsFn is the equals key function to execute when detecting equality of a key. +type databaseNamespacesMapEqualsFn func(ident.ID, ident.ID) bool + +// databaseNamespacesMapCopyFn is the copy key function to execute when copying the key. +type databaseNamespacesMapCopyFn func(ident.ID) ident.ID + +// databaseNamespacesMapFinalizeFn is the finalize key function to execute when finished with a key. +type databaseNamespacesMapFinalizeFn func(ident.ID) + +// databaseNamespacesMap uses the genny package to provide a generic hash map that can be specialized +// by running the following command from this root of the repository: +// ``` +// make hashmap-gen pkg=outpkg key_type=Type value_type=Type out_dir=/tmp +// ``` +// Or if you would like to use bytes or ident.ID as keys you can use the +// partially specialized maps to generate your own maps as well: +// ``` +// make byteshashmap-gen pkg=outpkg value_type=Type out_dir=/tmp +// make idhashmap-gen pkg=outpkg value_type=Type out_dir=/tmp +// ``` +// This will output to stdout the generated source file to use for your map. +// It uses linear probing by incrementing the number of the hash created when +// hashing the identifier if there is a collision. +// databaseNamespacesMap is a value type and not an interface to allow for less painful +// upgrades when adding/removing methods, it is not likely to need mocking so +// an interface would not be super useful either. +type databaseNamespacesMap struct { + _databaseNamespacesMapOptions + + // lookup uses hash of the identifier for the key and the MapEntry value + // wraps the value type and the key (used to ensure lookup is correct + // when dealing with collisions), we use uint64 for the hash partially + // because lookups of maps with uint64 keys has a fast path for Go. + lookup map[databaseNamespacesMapHash]databaseNamespacesMapEntry +} + +// _databaseNamespacesMapOptions is a set of options used when creating an identifier map, it is kept +// private so that implementers of the generated map can specify their own options +// that partially fulfill these options. +type _databaseNamespacesMapOptions struct { + // hash is the hash function to execute when hashing a key. + hash databaseNamespacesMapHashFn + // equals is the equals key function to execute when detecting equality. + equals databaseNamespacesMapEqualsFn + // copy is the copy key function to execute when copying the key. + copy databaseNamespacesMapCopyFn + // finalize is the finalize key function to execute when finished with a + // key, this is optional to specify. + finalize databaseNamespacesMapFinalizeFn + // initialSize is the initial size for the map, use zero to use Go's std map + // initial size and consequently is optional to specify. + initialSize int +} + +// databaseNamespacesMapEntry is an entry in the map, this is public to support iterating +// over the map using a native Go for loop. +type databaseNamespacesMapEntry struct { + // key is used to check equality on lookups to resolve collisions + key _databaseNamespacesMapKey + // value type stored + value databaseNamespace +} + +type _databaseNamespacesMapKey struct { + key ident.ID + finalize bool +} + +// Key returns the map entry key. +func (e databaseNamespacesMapEntry) Key() ident.ID { + return e.key.key +} + +// Value returns the map entry value. +func (e databaseNamespacesMapEntry) Value() databaseNamespace { + return e.value +} + +// _databaseNamespacesMapAlloc is a non-exported function so that when generating the source code +// for the map you can supply a public constructor that sets the correct +// hash, equals, copy, finalize options without users of the map needing to +// implement them themselves. +func _databaseNamespacesMapAlloc(opts _databaseNamespacesMapOptions) *databaseNamespacesMap { + m := &databaseNamespacesMap{_databaseNamespacesMapOptions: opts} + m.Reallocate() + return m +} + +func (m *databaseNamespacesMap) newMapKey(k ident.ID, opts _databaseNamespacesMapKeyOptions) _databaseNamespacesMapKey { + key := _databaseNamespacesMapKey{key: k, finalize: opts.finalizeKey} + if !opts.copyKey { + return key + } + + key.key = m.copy(k) + return key +} + +func (m *databaseNamespacesMap) removeMapKey(hash databaseNamespacesMapHash, key _databaseNamespacesMapKey) { + delete(m.lookup, hash) + if key.finalize { + m.finalize(key.key) + } +} + +// Get returns a value in the map for an identifier if found. +func (m *databaseNamespacesMap) Get(k ident.ID) (databaseNamespace, bool) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + return entry.value, true + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } + var empty databaseNamespace + return empty, false +} + +// Set will set the value for an identifier. +func (m *databaseNamespacesMap) Set(k ident.ID, v databaseNamespace) { + m.set(k, v, _databaseNamespacesMapKeyOptions{ + copyKey: true, + finalizeKey: m.finalize != nil, + }) +} + +// databaseNamespacesMapSetUnsafeOptions is a set of options to use when setting a value with +// the SetUnsafe method. +type databaseNamespacesMapSetUnsafeOptions struct { + NoCopyKey bool + NoFinalizeKey bool +} + +// SetUnsafe will set the value for an identifier with unsafe options for how +// the map treats the key. +func (m *databaseNamespacesMap) SetUnsafe(k ident.ID, v databaseNamespace, opts databaseNamespacesMapSetUnsafeOptions) { + m.set(k, v, _databaseNamespacesMapKeyOptions{ + copyKey: !opts.NoCopyKey, + finalizeKey: !opts.NoFinalizeKey, + }) +} + +type _databaseNamespacesMapKeyOptions struct { + copyKey bool + finalizeKey bool +} + +func (m *databaseNamespacesMap) set(k ident.ID, v databaseNamespace, opts _databaseNamespacesMapKeyOptions) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + m.lookup[hash] = databaseNamespacesMapEntry{ + key: entry.key, + value: v, + } + return + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } + + m.lookup[hash] = databaseNamespacesMapEntry{ + key: m.newMapKey(k, opts), + value: v, + } +} + +// Iter provides the underlying map to allow for using a native Go for loop +// to iterate the map, however callers should only ever read and not write +// the map. +func (m *databaseNamespacesMap) Iter() map[databaseNamespacesMapHash]databaseNamespacesMapEntry { + return m.lookup +} + +// Len returns the number of map entries in the map. +func (m *databaseNamespacesMap) Len() int { + return len(m.lookup) +} + +// Contains returns true if value exists for key, false otherwise, it is +// shorthand for a call to Get that doesn't return the value. +func (m *databaseNamespacesMap) Contains(k ident.ID) bool { + _, ok := m.Get(k) + return ok +} + +// Delete will remove a value set in the map for the specified key. +func (m *databaseNamespacesMap) Delete(k ident.ID) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + m.removeMapKey(hash, entry.key) + return + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } +} + +// Reset will reset the map by simply deleting all keys to avoid +// allocating a new map. +func (m *databaseNamespacesMap) Reset() { + for hash, entry := range m.lookup { + m.removeMapKey(hash, entry.key) + } +} + +// Reallocate will avoid deleting all keys and reallocate a new +// map, this is useful if you believe you have a large map and +// will not need to grow back to a similar size. +func (m *databaseNamespacesMap) Reallocate() { + if m.initialSize > 0 { + m.lookup = make(map[databaseNamespacesMapHash]databaseNamespacesMapEntry, m.initialSize) + } else { + m.lookup = make(map[databaseNamespacesMapHash]databaseNamespacesMapEntry) + } +} diff --git a/storage/namespace_new_map_gen.go b/storage/namespace_new_map_gen.go new file mode 100644 index 0000000000..c760dd3a30 --- /dev/null +++ b/storage/namespace_new_map_gen.go @@ -0,0 +1,76 @@ +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +package storage + +import ( + "github.com/m3db/m3x/ident" + + "github.com/m3db/m3x/pool" + + "github.com/cespare/xxhash" +) + +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// databaseNamespacesMapOptions provides options used when created the map. +type databaseNamespacesMapOptions struct { + InitialSize int + KeyCopyPool pool.BytesPool +} + +// newDatabaseNamespacesMap returns a new byte keyed map. +func newDatabaseNamespacesMap(opts databaseNamespacesMapOptions) *databaseNamespacesMap { + var ( + copyFn databaseNamespacesMapCopyFn + finalizeFn databaseNamespacesMapFinalizeFn + ) + if pool := opts.KeyCopyPool; pool == nil { + copyFn = func(k ident.ID) ident.ID { + return ident.BytesID(append([]byte(nil), k.Bytes()...)) + } + } else { + copyFn = func(k ident.ID) ident.ID { + bytes := k.Bytes() + keyLen := len(bytes) + pooled := pool.Get(keyLen)[:keyLen] + copy(pooled, bytes) + return ident.BytesID(pooled) + } + finalizeFn = func(k ident.ID) { + if slice, ok := k.(ident.BytesID); ok { + pool.Put(slice) + } + } + } + return _databaseNamespacesMapAlloc(_databaseNamespacesMapOptions{ + hash: func(id ident.ID) databaseNamespacesMapHash { + return databaseNamespacesMapHash(xxhash.Sum64(id.Bytes())) + }, + equals: func(x, y ident.ID) bool { + return x.Equal(y) + }, + copy: copyFn, + finalize: finalizeFn, + initialSize: opts.InitialSize, + }) +} diff --git a/storage/namespace_test.go b/storage/namespace_test.go index 6d268f8d98..a96bbfb7ef 100644 --- a/storage/namespace_test.go +++ b/storage/namespace_test.go @@ -359,7 +359,7 @@ func TestNamespaceBootstrapAllShards(t *testing.T) { shard := NewMockdatabaseShard(ctrl) shard.EXPECT().IsBootstrapped().Return(false) shard.EXPECT().ID().Return(uint32(i)).AnyTimes() - shard.EXPECT().Bootstrap(nil).Return(errs[i]) + shard.EXPECT().Bootstrap(gomock.Any()).Return(errs[i]) ns.shards[testShardIDs[i].ID()] = shard } @@ -406,7 +406,7 @@ func TestNamespaceBootstrapOnlyNonBootstrappedShards(t *testing.T) { shard := NewMockdatabaseShard(ctrl) shard.EXPECT().IsBootstrapped().Return(false) shard.EXPECT().ID().Return(testShard.ID()).AnyTimes() - shard.EXPECT().Bootstrap(nil).Return(nil) + shard.EXPECT().Bootstrap(gomock.Any()).Return(nil) ns.shards[testShard.ID()] = shard } for _, testShard := range alreadyBootstrapped { diff --git a/storage/repair.go b/storage/repair.go index 3ea053a5bc..4ac8e5e14e 100644 --- a/storage/repair.go +++ b/storage/repair.go @@ -184,7 +184,12 @@ type repairState struct { type namespaceRepairStateByTime map[xtime.UnixNano]repairState -type repairStatesByNs map[ident.Hash]namespaceRepairStateByTime +// NB(r): This uses a map[string]element instead of a generated map for +// native ident.ID keys, this was because the call frequency is very low +// it's not in the hot path so casting ident.ID to string isn't too expensive +// and this data structure may very well change soon with a refactor of the +// background repair in the works. +type repairStatesByNs map[string]namespaceRepairStateByTime func newRepairStates() repairStatesByNs { return make(repairStatesByNs) @@ -196,7 +201,7 @@ func (r repairStatesByNs) repairStates( ) (repairState, bool) { var rs repairState - nsRepairState, ok := r[namespace.Hash()] + nsRepairState, ok := r[namespace.String()] if !ok { return rs, false } @@ -210,10 +215,10 @@ func (r repairStatesByNs) setRepairState( t time.Time, state repairState, ) { - nsRepairState, ok := r[namespace.Hash()] + nsRepairState, ok := r[namespace.String()] if !ok { nsRepairState = make(namespaceRepairStateByTime) - r[namespace.Hash()] = nsRepairState + r[namespace.String()] = nsRepairState } nsRepairState[xtime.ToUnixNano(t)] = state } @@ -328,7 +333,7 @@ func (r *dbRepairer) namespaceRepairTimeRanges(ns databaseNamespace) xtime.Range ) targetRanges := xtime.Ranges{}.AddRange(xtime.Range{Start: start, End: end}) - for tNano := range r.repairStatesByNs[ns.ID().Hash()] { + for tNano := range r.repairStatesByNs[ns.ID().String()] { t := tNano.ToTime() if !r.needsRepair(ns.ID(), t) { targetRanges = targetRanges.RemoveRange(xtime.Range{Start: t, End: t.Add(blockSize)}) diff --git a/storage/repair/map_gen.go b/storage/repair/map_gen.go new file mode 100644 index 0000000000..0ea6cb350a --- /dev/null +++ b/storage/repair/map_gen.go @@ -0,0 +1,259 @@ +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +package repair + +import ( + "github.com/m3db/m3x/ident" +) + +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// MapHash is the hash for a given map entry, this is public to support +// iterating over the map using a native Go for loop. +type MapHash uint64 + +// HashFn is the hash function to execute when hashing a key. +type HashFn func(ident.ID) MapHash + +// EqualsFn is the equals key function to execute when detecting equality of a key. +type EqualsFn func(ident.ID, ident.ID) bool + +// CopyFn is the copy key function to execute when copying the key. +type CopyFn func(ident.ID) ident.ID + +// FinalizeFn is the finalize key function to execute when finished with a key. +type FinalizeFn func(ident.ID) + +// Map uses the genny package to provide a generic hash map that can be specialized +// by running the following command from this root of the repository: +// ``` +// make hashmap-gen pkg=outpkg key_type=Type value_type=Type out_dir=/tmp +// ``` +// Or if you would like to use bytes or ident.ID as keys you can use the +// partially specialized maps to generate your own maps as well: +// ``` +// make byteshashmap-gen pkg=outpkg value_type=Type out_dir=/tmp +// make idhashmap-gen pkg=outpkg value_type=Type out_dir=/tmp +// ``` +// This will output to stdout the generated source file to use for your map. +// It uses linear probing by incrementing the number of the hash created when +// hashing the identifier if there is a collision. +// Map is a value type and not an interface to allow for less painful +// upgrades when adding/removing methods, it is not likely to need mocking so +// an interface would not be super useful either. +type Map struct { + mapOptions + + // lookup uses hash of the identifier for the key and the MapEntry value + // wraps the value type and the key (used to ensure lookup is correct + // when dealing with collisions), we use uint64 for the hash partially + // because lookups of maps with uint64 keys has a fast path for Go. + lookup map[MapHash]MapEntry +} + +// mapOptions is a set of options used when creating an identifier map, it is kept +// private so that implementers of the generated map can specify their own options +// that partially fulfill these options. +type mapOptions struct { + // hash is the hash function to execute when hashing a key. + hash HashFn + // equals is the equals key function to execute when detecting equality. + equals EqualsFn + // copy is the copy key function to execute when copying the key. + copy CopyFn + // finalize is the finalize key function to execute when finished with a + // key, this is optional to specify. + finalize FinalizeFn + // initialSize is the initial size for the map, use zero to use Go's std map + // initial size and consequently is optional to specify. + initialSize int +} + +// MapEntry is an entry in the map, this is public to support iterating +// over the map using a native Go for loop. +type MapEntry struct { + // key is used to check equality on lookups to resolve collisions + key mapKey + // value type stored + value ReplicaSeriesBlocksMetadata +} + +type mapKey struct { + key ident.ID + finalize bool +} + +// Key returns the map entry key. +func (e MapEntry) Key() ident.ID { + return e.key.key +} + +// Value returns the map entry value. +func (e MapEntry) Value() ReplicaSeriesBlocksMetadata { + return e.value +} + +// mapAlloc is a non-exported function so that when generating the source code +// for the map you can supply a public constructor that sets the correct +// hash, equals, copy, finalize options without users of the map needing to +// implement them themselves. +func mapAlloc(opts mapOptions) *Map { + m := &Map{mapOptions: opts} + m.Reallocate() + return m +} + +func (m *Map) newMapKey(k ident.ID, opts mapKeyOptions) mapKey { + key := mapKey{key: k, finalize: opts.finalizeKey} + if !opts.copyKey { + return key + } + + key.key = m.copy(k) + return key +} + +func (m *Map) removeMapKey(hash MapHash, key mapKey) { + delete(m.lookup, hash) + if key.finalize { + m.finalize(key.key) + } +} + +// Get returns a value in the map for an identifier if found. +func (m *Map) Get(k ident.ID) (ReplicaSeriesBlocksMetadata, bool) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + return entry.value, true + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } + var empty ReplicaSeriesBlocksMetadata + return empty, false +} + +// Set will set the value for an identifier. +func (m *Map) Set(k ident.ID, v ReplicaSeriesBlocksMetadata) { + m.set(k, v, mapKeyOptions{ + copyKey: true, + finalizeKey: m.finalize != nil, + }) +} + +// SetUnsafeOptions is a set of options to use when setting a value with +// the SetUnsafe method. +type SetUnsafeOptions struct { + NoCopyKey bool + NoFinalizeKey bool +} + +// SetUnsafe will set the value for an identifier with unsafe options for how +// the map treats the key. +func (m *Map) SetUnsafe(k ident.ID, v ReplicaSeriesBlocksMetadata, opts SetUnsafeOptions) { + m.set(k, v, mapKeyOptions{ + copyKey: !opts.NoCopyKey, + finalizeKey: !opts.NoFinalizeKey, + }) +} + +type mapKeyOptions struct { + copyKey bool + finalizeKey bool +} + +func (m *Map) set(k ident.ID, v ReplicaSeriesBlocksMetadata, opts mapKeyOptions) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + m.lookup[hash] = MapEntry{ + key: entry.key, + value: v, + } + return + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } + + m.lookup[hash] = MapEntry{ + key: m.newMapKey(k, opts), + value: v, + } +} + +// Iter provides the underlying map to allow for using a native Go for loop +// to iterate the map, however callers should only ever read and not write +// the map. +func (m *Map) Iter() map[MapHash]MapEntry { + return m.lookup +} + +// Len returns the number of map entries in the map. +func (m *Map) Len() int { + return len(m.lookup) +} + +// Contains returns true if value exists for key, false otherwise, it is +// shorthand for a call to Get that doesn't return the value. +func (m *Map) Contains(k ident.ID) bool { + _, ok := m.Get(k) + return ok +} + +// Delete will remove a value set in the map for the specified key. +func (m *Map) Delete(k ident.ID) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + m.removeMapKey(hash, entry.key) + return + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } +} + +// Reset will reset the map by simply deleting all keys to avoid +// allocating a new map. +func (m *Map) Reset() { + for hash, entry := range m.lookup { + m.removeMapKey(hash, entry.key) + } +} + +// Reallocate will avoid deleting all keys and reallocate a new +// map, this is useful if you believe you have a large map and +// will not need to grow back to a similar size. +func (m *Map) Reallocate() { + if m.initialSize > 0 { + m.lookup = make(map[MapHash]MapEntry, m.initialSize) + } else { + m.lookup = make(map[MapHash]MapEntry) + } +} diff --git a/storage/repair/metadata.go b/storage/repair/metadata.go index 59ffa957f0..9f68b6b311 100644 --- a/storage/repair/metadata.go +++ b/storage/repair/metadata.go @@ -117,26 +117,31 @@ func (m replicaBlocksMetadata) Close() { } // NB(xichen): replicaSeriesMetadata is not thread-safe -type replicaSeriesMetadata map[ident.Hash]ReplicaSeriesBlocksMetadata +type replicaSeriesMetadata struct { + values *Map +} // NewReplicaSeriesMetadata creates a new replica series metadata func NewReplicaSeriesMetadata() ReplicaSeriesMetadata { - return make(replicaSeriesMetadata, defaultReplicaSeriesMetadataCapacity) + return replicaSeriesMetadata{ + values: NewMap(MapOptions{InitialSize: defaultReplicaSeriesMetadataCapacity}), + } } -func (m replicaSeriesMetadata) NumSeries() int64 { return int64(len(m)) } -func (m replicaSeriesMetadata) Series() map[ident.Hash]ReplicaSeriesBlocksMetadata { return m } +func (m replicaSeriesMetadata) NumSeries() int64 { return int64(m.values.Len()) } +func (m replicaSeriesMetadata) Series() *Map { return m.values } func (m replicaSeriesMetadata) NumBlocks() int64 { var numBlocks int64 - for _, series := range m { + for _, entry := range m.values.Iter() { + series := entry.Value() numBlocks += series.Metadata.NumBlocks() } return numBlocks } func (m replicaSeriesMetadata) GetOrAdd(id ident.ID) ReplicaBlocksMetadata { - blocks, exists := m[id.Hash()] + blocks, exists := m.values.Get(id) if exists { return blocks.Metadata } @@ -144,12 +149,13 @@ func (m replicaSeriesMetadata) GetOrAdd(id ident.ID) ReplicaBlocksMetadata { ID: id, Metadata: NewReplicaBlocksMetadata(), } - m[id.Hash()] = blocks + m.values.Set(id, blocks) return blocks.Metadata } func (m replicaSeriesMetadata) Close() { - for _, series := range m { + for _, entry := range m.values.Iter() { + series := entry.Value() series.Metadata.Close() } } @@ -201,7 +207,8 @@ func (m replicaMetadataComparer) Compare() MetadataComparisonResult { checkSumDiff = NewReplicaSeriesMetadata() ) - for _, series := range m.metadata.Series() { + for _, entry := range m.metadata.Series().Iter() { + series := entry.Value() for _, b := range series.Metadata.Blocks() { bm := b.Metadata() diff --git a/storage/repair/metadata_test.go b/storage/repair/metadata_test.go index bb57bb2bb4..db3f36b82e 100644 --- a/storage/repair/metadata_test.go +++ b/storage/repair/metadata_test.go @@ -96,13 +96,13 @@ func TestReplicaSeriesMetadataGetOrAdd(t *testing.T) { // Add a series m.GetOrAdd(ident.StringID("foo")) series := m.Series() - require.Equal(t, 1, len(series)) - _, exists := series[ident.StringID("foo").Hash()] + require.Equal(t, 1, series.Len()) + _, exists := series.Get(ident.StringID("foo")) require.True(t, exists) // Add the same series and check we don't add new series m.GetOrAdd(ident.StringID("foo")) - require.Equal(t, 1, len(m.Series())) + require.Equal(t, 1, m.Series().Len()) } type testBlock struct { @@ -115,7 +115,8 @@ func assertEqual(t *testing.T, expected []testBlock, actual ReplicaSeriesMetadat require.Equal(t, len(expected), int(actual.NumBlocks())) for _, b := range expected { - series := actual.Series()[b.id.Hash()] + series, ok := actual.Series().Get(b.id) + require.True(t, ok) blocks := series.Metadata.Blocks()[xtime.ToUnixNano(b.ts)] require.Equal(t, b.blocks, blocks.Metadata()) } @@ -226,6 +227,8 @@ func TestReplicaMetadataComparerCompare(t *testing.T) { ) metadata := NewReplicaSeriesMetadata() + defer metadata.Close() + inputs := []struct { host topology.Host id string diff --git a/storage/repair/new_map_gen.go b/storage/repair/new_map_gen.go new file mode 100644 index 0000000000..ea98f819ab --- /dev/null +++ b/storage/repair/new_map_gen.go @@ -0,0 +1,76 @@ +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +package repair + +import ( + "github.com/m3db/m3x/ident" + + "github.com/m3db/m3x/pool" + + "github.com/cespare/xxhash" +) + +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// MapOptions provides options used when created the map. +type MapOptions struct { + InitialSize int + KeyCopyPool pool.BytesPool +} + +// NewMap returns a new byte keyed map. +func NewMap(opts MapOptions) *Map { + var ( + copyFn CopyFn + finalizeFn FinalizeFn + ) + if pool := opts.KeyCopyPool; pool == nil { + copyFn = func(k ident.ID) ident.ID { + return ident.BytesID(append([]byte(nil), k.Bytes()...)) + } + } else { + copyFn = func(k ident.ID) ident.ID { + bytes := k.Bytes() + keyLen := len(bytes) + pooled := pool.Get(keyLen)[:keyLen] + copy(pooled, bytes) + return ident.BytesID(pooled) + } + finalizeFn = func(k ident.ID) { + if slice, ok := k.(ident.BytesID); ok { + pool.Put(slice) + } + } + } + return mapAlloc(mapOptions{ + hash: func(id ident.ID) MapHash { + return MapHash(xxhash.Sum64(id.Bytes())) + }, + equals: func(x, y ident.ID) bool { + return x.Equal(y) + }, + copy: copyFn, + finalize: finalizeFn, + initialSize: opts.InitialSize, + }) +} diff --git a/storage/repair/types.go b/storage/repair/types.go index 9cf5b1f5c5..30d6f78e2b 100644 --- a/storage/repair/types.go +++ b/storage/repair/types.go @@ -103,7 +103,7 @@ type ReplicaSeriesMetadata interface { NumBlocks() int64 // Series returns the series metadata - Series() map[ident.Hash]ReplicaSeriesBlocksMetadata + Series() *Map // GetOrAdd returns the series metadata for an id, creating one if it doesn't exist GetOrAdd(id ident.ID) ReplicaBlocksMetadata diff --git a/storage/repair_test.go b/storage/repair_test.go index 5e8d6ac11a..10a34feb55 100644 --- a/storage/repair_test.go +++ b/storage/repair_test.go @@ -315,10 +315,10 @@ func TestDatabaseShardRepairerRepair(t *testing.T) { require.Equal(t, resShard, shard) require.Equal(t, int64(2), resDiff.NumSeries) require.Equal(t, int64(3), resDiff.NumBlocks) - require.Equal(t, 0, len(resDiff.ChecksumDifferences.Series())) + require.Equal(t, 0, resDiff.ChecksumDifferences.Series().Len()) sizeDiffSeries := resDiff.SizeDifferences.Series() - require.Equal(t, 1, len(sizeDiffSeries)) - series, exists := sizeDiffSeries[ident.StringID("foo").Hash()] + require.Equal(t, 1, sizeDiffSeries.Len()) + series, exists := sizeDiffSeries.Get(ident.StringID("foo")) require.True(t, exists) blocks := series.Metadata.Blocks() require.Equal(t, 1, len(blocks)) diff --git a/storage/shard.go b/storage/shard.go index 5931d56039..b230f6b58d 100644 --- a/storage/shard.go +++ b/storage/shard.go @@ -108,7 +108,7 @@ type dbShard struct { commitLogWriter commitLogWriter reverseIndex namespaceIndex insertQueue *dbShardInsertQueue - lookup map[ident.Hash]*list.Element + lookup *shardMap list *list.List bs bootstrapState filesetBeforeFn filesetBeforeFn @@ -219,6 +219,8 @@ type dbShardEntryWorkFn func(entry *dbShardEntry) bool type dbShardEntryBatchWorkFn func(entries []*dbShardEntry) bool +type shardListElement *list.Element + type shardFlushState struct { sync.RWMutex statesByTime map[xtime.UnixNano]fileOpState @@ -263,7 +265,7 @@ func newDatabaseShard( seriesPool: opts.DatabaseSeriesPool(), commitLogWriter: commitLogWriter, reverseIndex: reverseIndex, - lookup: make(map[ident.Hash]*list.Element), + lookup: newShardMap(shardMapOptions{}), list: list.New(), filesetBeforeFn: fs.FilesetBefore, deleteFilesFn: fs.DeleteFiles, @@ -715,8 +717,8 @@ func (s *dbShard) purgeExpiredSeries(expiredEntries []*dbShardEntry) { s.Lock() for _, entry := range expiredEntries { series := entry.series - hash := series.ID().Hash() - elem, exists := s.lookup[hash] + id := series.ID() + elem, exists := s.lookup.Get(id) if !exists { continue } @@ -745,7 +747,7 @@ func (s *dbShard) purgeExpiredSeries(expiredEntries []*dbShardEntry) { // safe to remove it. series.Close() s.list.Remove(elem) - delete(s.lookup, hash) + s.lookup.Delete(id) } s.Unlock() } @@ -934,7 +936,7 @@ func (s *dbShard) lookupEntryWithLock(id ident.ID) (*dbShardEntry, *list.Element // callers will not retry this operation return nil, nil, xerrors.NewInvalidParamsError(errShardNotOpen) } - elem, exists := s.lookup[id.Hash()] + elem, exists := s.lookup.Get(id) if !exists { return nil, nil, errShardEntryNotFound } @@ -1103,16 +1105,30 @@ func (s *dbShard) insertSeriesSync( } } + copiedID := entry.series.ID() + copiedTags := entry.series.Tags() if s.reverseIndex != nil { - if err := s.reverseIndex.Write(entry.series.ID(), entry.series.Tags(), entry); err != nil { + if err := s.reverseIndex.Write(copiedID, copiedTags, entry); err != nil { return nil, err } } - s.lookup[entry.series.ID().Hash()] = s.list.PushBack(entry) + s.insertNewShardEntryWithLock(entry) return entry, nil } +func (s *dbShard) insertNewShardEntryWithLock(entry *dbShardEntry) { + // Set the lookup value, we use the copied ID and since it is GC'd + // we explicitly set it with options to not copy the key and not to + // finalize it + copiedID := entry.series.ID() + listElem := s.list.PushBack(entry) + s.lookup.SetUnsafe(copiedID, listElem, shardMapSetUnsafeOptions{ + NoCopyKey: true, + NoFinalizeKey: true, + }) +} + func (s *dbShard) insertSeriesBatch(inserts []dbShardInsert) error { anyPendingAction := false @@ -1157,7 +1173,7 @@ func (s *dbShard) insertSeriesBatch(inserts []dbShardInsert) error { s.metrics.insertAsyncBootstrapErrors.Inc(1) } } - s.lookup[entry.series.ID().Hash()] = s.list.PushBack(entry) + s.insertNewShardEntryWithLock(entry) } s.Unlock() @@ -1555,7 +1571,7 @@ func (s *dbShard) FetchBlocksMetadataV2( } func (s *dbShard) Bootstrap( - bootstrappedSeries map[ident.Hash]result.DatabaseSeriesBlocks, + bootstrappedSeries *result.Map, ) error { s.Lock() if s.bs == bootstrapped { @@ -1570,7 +1586,9 @@ func (s *dbShard) Bootstrap( s.Unlock() multiErr := xerrors.NewMultiError() - for _, dbBlocks := range bootstrappedSeries { + for _, elem := range bootstrappedSeries.Iter() { + dbBlocks := elem.Value() + // First lookup if series already exists entry, _, err := s.tryRetrieveWritableSeries(dbBlocks.ID) if err != nil { diff --git a/storage/shard_map_gen.go b/storage/shard_map_gen.go new file mode 100644 index 0000000000..644896c1c2 --- /dev/null +++ b/storage/shard_map_gen.go @@ -0,0 +1,259 @@ +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +package storage + +import ( + "github.com/m3db/m3x/ident" +) + +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// shardMapHash is the hash for a given map entry, this is public to support +// iterating over the map using a native Go for loop. +type shardMapHash uint64 + +// shardMapHashFn is the hash function to execute when hashing a key. +type shardMapHashFn func(ident.ID) shardMapHash + +// shardMapEqualsFn is the equals key function to execute when detecting equality of a key. +type shardMapEqualsFn func(ident.ID, ident.ID) bool + +// shardMapCopyFn is the copy key function to execute when copying the key. +type shardMapCopyFn func(ident.ID) ident.ID + +// shardMapFinalizeFn is the finalize key function to execute when finished with a key. +type shardMapFinalizeFn func(ident.ID) + +// shardMap uses the genny package to provide a generic hash map that can be specialized +// by running the following command from this root of the repository: +// ``` +// make hashmap-gen pkg=outpkg key_type=Type value_type=Type out_dir=/tmp +// ``` +// Or if you would like to use bytes or ident.ID as keys you can use the +// partially specialized maps to generate your own maps as well: +// ``` +// make byteshashmap-gen pkg=outpkg value_type=Type out_dir=/tmp +// make idhashmap-gen pkg=outpkg value_type=Type out_dir=/tmp +// ``` +// This will output to stdout the generated source file to use for your map. +// It uses linear probing by incrementing the number of the hash created when +// hashing the identifier if there is a collision. +// shardMap is a value type and not an interface to allow for less painful +// upgrades when adding/removing methods, it is not likely to need mocking so +// an interface would not be super useful either. +type shardMap struct { + _shardMapOptions + + // lookup uses hash of the identifier for the key and the MapEntry value + // wraps the value type and the key (used to ensure lookup is correct + // when dealing with collisions), we use uint64 for the hash partially + // because lookups of maps with uint64 keys has a fast path for Go. + lookup map[shardMapHash]shardMapEntry +} + +// _shardMapOptions is a set of options used when creating an identifier map, it is kept +// private so that implementers of the generated map can specify their own options +// that partially fulfill these options. +type _shardMapOptions struct { + // hash is the hash function to execute when hashing a key. + hash shardMapHashFn + // equals is the equals key function to execute when detecting equality. + equals shardMapEqualsFn + // copy is the copy key function to execute when copying the key. + copy shardMapCopyFn + // finalize is the finalize key function to execute when finished with a + // key, this is optional to specify. + finalize shardMapFinalizeFn + // initialSize is the initial size for the map, use zero to use Go's std map + // initial size and consequently is optional to specify. + initialSize int +} + +// shardMapEntry is an entry in the map, this is public to support iterating +// over the map using a native Go for loop. +type shardMapEntry struct { + // key is used to check equality on lookups to resolve collisions + key _shardMapKey + // value type stored + value shardListElement +} + +type _shardMapKey struct { + key ident.ID + finalize bool +} + +// Key returns the map entry key. +func (e shardMapEntry) Key() ident.ID { + return e.key.key +} + +// Value returns the map entry value. +func (e shardMapEntry) Value() shardListElement { + return e.value +} + +// _shardMapAlloc is a non-exported function so that when generating the source code +// for the map you can supply a public constructor that sets the correct +// hash, equals, copy, finalize options without users of the map needing to +// implement them themselves. +func _shardMapAlloc(opts _shardMapOptions) *shardMap { + m := &shardMap{_shardMapOptions: opts} + m.Reallocate() + return m +} + +func (m *shardMap) newMapKey(k ident.ID, opts _shardMapKeyOptions) _shardMapKey { + key := _shardMapKey{key: k, finalize: opts.finalizeKey} + if !opts.copyKey { + return key + } + + key.key = m.copy(k) + return key +} + +func (m *shardMap) removeMapKey(hash shardMapHash, key _shardMapKey) { + delete(m.lookup, hash) + if key.finalize { + m.finalize(key.key) + } +} + +// Get returns a value in the map for an identifier if found. +func (m *shardMap) Get(k ident.ID) (shardListElement, bool) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + return entry.value, true + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } + var empty shardListElement + return empty, false +} + +// Set will set the value for an identifier. +func (m *shardMap) Set(k ident.ID, v shardListElement) { + m.set(k, v, _shardMapKeyOptions{ + copyKey: true, + finalizeKey: m.finalize != nil, + }) +} + +// shardMapSetUnsafeOptions is a set of options to use when setting a value with +// the SetUnsafe method. +type shardMapSetUnsafeOptions struct { + NoCopyKey bool + NoFinalizeKey bool +} + +// SetUnsafe will set the value for an identifier with unsafe options for how +// the map treats the key. +func (m *shardMap) SetUnsafe(k ident.ID, v shardListElement, opts shardMapSetUnsafeOptions) { + m.set(k, v, _shardMapKeyOptions{ + copyKey: !opts.NoCopyKey, + finalizeKey: !opts.NoFinalizeKey, + }) +} + +type _shardMapKeyOptions struct { + copyKey bool + finalizeKey bool +} + +func (m *shardMap) set(k ident.ID, v shardListElement, opts _shardMapKeyOptions) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + m.lookup[hash] = shardMapEntry{ + key: entry.key, + value: v, + } + return + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } + + m.lookup[hash] = shardMapEntry{ + key: m.newMapKey(k, opts), + value: v, + } +} + +// Iter provides the underlying map to allow for using a native Go for loop +// to iterate the map, however callers should only ever read and not write +// the map. +func (m *shardMap) Iter() map[shardMapHash]shardMapEntry { + return m.lookup +} + +// Len returns the number of map entries in the map. +func (m *shardMap) Len() int { + return len(m.lookup) +} + +// Contains returns true if value exists for key, false otherwise, it is +// shorthand for a call to Get that doesn't return the value. +func (m *shardMap) Contains(k ident.ID) bool { + _, ok := m.Get(k) + return ok +} + +// Delete will remove a value set in the map for the specified key. +func (m *shardMap) Delete(k ident.ID) { + hash := m.hash(k) + for entry, ok := m.lookup[hash]; ok; entry, ok = m.lookup[hash] { + if m.equals(entry.key.key, k) { + m.removeMapKey(hash, entry.key) + return + } + // Linear probe to "next" to this entry (really a rehash) + hash++ + } +} + +// Reset will reset the map by simply deleting all keys to avoid +// allocating a new map. +func (m *shardMap) Reset() { + for hash, entry := range m.lookup { + m.removeMapKey(hash, entry.key) + } +} + +// Reallocate will avoid deleting all keys and reallocate a new +// map, this is useful if you believe you have a large map and +// will not need to grow back to a similar size. +func (m *shardMap) Reallocate() { + if m.initialSize > 0 { + m.lookup = make(map[shardMapHash]shardMapEntry, m.initialSize) + } else { + m.lookup = make(map[shardMapHash]shardMapEntry) + } +} diff --git a/storage/shard_new_map_gen.go b/storage/shard_new_map_gen.go new file mode 100644 index 0000000000..37cf3d951d --- /dev/null +++ b/storage/shard_new_map_gen.go @@ -0,0 +1,76 @@ +// This file was automatically generated by genny. +// Any changes will be lost if this file is regenerated. +// see https://github.com/mauricelam/genny + +package storage + +import ( + "github.com/m3db/m3x/ident" + + "github.com/m3db/m3x/pool" + + "github.com/cespare/xxhash" +) + +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// shardMapOptions provides options used when created the map. +type shardMapOptions struct { + InitialSize int + KeyCopyPool pool.BytesPool +} + +// newShardMap returns a new byte keyed map. +func newShardMap(opts shardMapOptions) *shardMap { + var ( + copyFn shardMapCopyFn + finalizeFn shardMapFinalizeFn + ) + if pool := opts.KeyCopyPool; pool == nil { + copyFn = func(k ident.ID) ident.ID { + return ident.BytesID(append([]byte(nil), k.Bytes()...)) + } + } else { + copyFn = func(k ident.ID) ident.ID { + bytes := k.Bytes() + keyLen := len(bytes) + pooled := pool.Get(keyLen)[:keyLen] + copy(pooled, bytes) + return ident.BytesID(pooled) + } + finalizeFn = func(k ident.ID) { + if slice, ok := k.(ident.BytesID); ok { + pool.Put(slice) + } + } + } + return _shardMapAlloc(_shardMapOptions{ + hash: func(id ident.ID) shardMapHash { + return shardMapHash(xxhash.Sum64(id.Bytes())) + }, + equals: func(x, y ident.ID) bool { + return x.Equal(y) + }, + copy: copyFn, + finalize: finalizeFn, + initialSize: opts.InitialSize, + }) +} diff --git a/storage/shard_test.go b/storage/shard_test.go index 0c0ae95fe0..8ecd295a8e 100644 --- a/storage/shard_test.go +++ b/storage/shard_test.go @@ -82,7 +82,9 @@ func addMockSeries(ctrl *gomock.Controller, shard *dbShard, id ident.ID, tags id series.EXPECT().ID().Return(id).AnyTimes() series.EXPECT().Tags().Return(tags).AnyTimes() series.EXPECT().IsEmpty().Return(false).AnyTimes() - shard.lookup[id.Hash()] = shard.list.PushBack(&dbShardEntry{series: series, index: index}) + shard.Lock() + shard.insertNewShardEntryWithLock(&dbShardEntry{series: series, index: index}) + shard.Unlock() return series } @@ -134,8 +136,10 @@ func TestShardBootstrapWithError(t *testing.T) { barSeries := series.NewMockDatabaseSeries(ctrl) barSeries.EXPECT().ID().Return(ident.StringID("bar")).AnyTimes() barSeries.EXPECT().IsEmpty().Return(false).AnyTimes() - s.lookup[ident.StringID("foo").Hash()] = s.list.PushBack(&dbShardEntry{series: fooSeries}) - s.lookup[ident.StringID("bar").Hash()] = s.list.PushBack(&dbShardEntry{series: barSeries}) + s.Lock() + s.insertNewShardEntryWithLock(&dbShardEntry{series: fooSeries}) + s.insertNewShardEntryWithLock(&dbShardEntry{series: barSeries}) + s.Unlock() fooBlocks := block.NewMockDatabaseSeriesBlocks(ctrl) barBlocks := block.NewMockDatabaseSeriesBlocks(ctrl) @@ -147,10 +151,9 @@ func TestShardBootstrapWithError(t *testing.T) { fooID := ident.StringID("foo") barID := ident.StringID("bar") - bootstrappedSeries := map[ident.Hash]result.DatabaseSeriesBlocks{ - fooID.Hash(): {ID: fooID, Blocks: fooBlocks}, - barID.Hash(): {ID: barID, Blocks: barBlocks}, - } + bootstrappedSeries := result.NewMap(result.MapOptions{}) + bootstrappedSeries.Set(fooID, result.DatabaseSeriesBlocks{ID: fooID, Blocks: fooBlocks}) + bootstrappedSeries.Set(barID, result.DatabaseSeriesBlocks{ID: barID, Blocks: barBlocks}) err := s.Bootstrap(bootstrappedSeries) @@ -425,7 +428,7 @@ func addMockTestSeries(ctrl *gomock.Controller, shard *dbShard, id ident.ID) *se series := series.NewMockDatabaseSeries(ctrl) series.EXPECT().ID().AnyTimes().Return(id) shard.Lock() - shard.lookup[id.Hash()] = shard.list.PushBack(&dbShardEntry{series: series}) + shard.insertNewShardEntryWithLock(&dbShardEntry{series: series}) shard.Unlock() return series } @@ -438,8 +441,7 @@ func addTestSeriesWithCount(shard *dbShard, id ident.ID, count int32) series.Dat series := series.NewDatabaseSeries(id, nil, shard.seriesOpts) series.Bootstrap(nil) shard.Lock() - shard.lookup[id.Hash()] = shard.list.PushBack(&dbShardEntry{ - series: series, curReadWriters: count}) + shard.insertNewShardEntryWithLock(&dbShardEntry{series: series, curReadWriters: count}) shard.Unlock() return series } @@ -614,7 +616,7 @@ func TestShardTickRace(t *testing.T) { wg.Wait() shard.RLock() - shardlen := len(shard.lookup) + shardlen := shard.lookup.Len() shard.RUnlock() require.Equal(t, 0, shardlen) } @@ -626,7 +628,7 @@ func TestShardTickCleanupSmallBatchSize(t *testing.T) { shard := testDatabaseShard(t, opts) addTestSeries(shard, ident.StringID("foo")) shard.Tick(context.NewNoOpCanncellable()) - require.Equal(t, 0, len(shard.lookup)) + require.Equal(t, 0, shard.lookup.Len()) } // This tests ensures the shard returns an error if two ticks are triggered concurrently. @@ -752,7 +754,7 @@ func TestPurgeExpiredSeriesEmptySeries(t *testing.T) { shard.Tick(context.NewNoOpCanncellable()) shard.RLock() - require.Equal(t, 0, len(shard.lookup)) + require.Equal(t, 0, shard.lookup.Len()) shard.RUnlock() } @@ -795,7 +797,7 @@ func TestPurgeExpiredSeriesWriteAfterTicking(t *testing.T) { require.NoError(t, err) require.Equal(t, 0, r.activeSeries) require.Equal(t, 1, r.expiredSeries) - require.Equal(t, 1, len(shard.lookup)) + require.Equal(t, 1, shard.lookup.Len()) } // This tests the scenario where tickForEachSeries finishes, and before purgeExpiredSeries @@ -823,10 +825,10 @@ func TestPurgeExpiredSeriesWriteAfterPurging(t *testing.T) { require.NoError(t, err) require.Equal(t, 0, r.activeSeries) require.Equal(t, 1, r.expiredSeries) - require.Equal(t, 1, len(shard.lookup)) + require.Equal(t, 1, shard.lookup.Len()) entry.decrementReaderWriterCount() - require.Equal(t, 1, len(shard.lookup)) + require.Equal(t, 1, shard.lookup.Len()) } func TestForEachShardEntry(t *testing.T) { diff --git a/storage/storage_mock.go b/storage/storage_mock.go index e9d3f785e3..f48ce49b7d 100644 --- a/storage/storage_mock.go +++ b/storage/storage_mock.go @@ -1202,7 +1202,7 @@ func (_mr *MockdatabaseShardMockRecorder) FetchBlocksMetadataV2(arg0, arg1, arg2 } // Bootstrap mocks base method -func (_m *MockdatabaseShard) Bootstrap(bootstrappedSeries map[ident.Hash]result.DatabaseSeriesBlocks) error { +func (_m *MockdatabaseShard) Bootstrap(bootstrappedSeries *result.Map) error { ret := _m.ctrl.Call(_m, "Bootstrap", bootstrappedSeries) ret0, _ := ret[0].(error) return ret0 diff --git a/storage/types.go b/storage/types.go index a5a62901ed..e78c427fa6 100644 --- a/storage/types.go +++ b/storage/types.go @@ -202,7 +202,7 @@ type NamespacesByID []Namespace func (n NamespacesByID) Len() int { return len(n) } func (n NamespacesByID) Swap(i, j int) { n[i], n[j] = n[j], n[i] } func (n NamespacesByID) Less(i, j int) bool { - return bytes.Compare(n[i].ID().Data().Get(), n[j].ID().Data().Get()) < 0 + return bytes.Compare(n[i].ID().Data().Bytes(), n[j].ID().Data().Bytes()) < 0 } type databaseNamespace interface { @@ -385,7 +385,7 @@ type databaseShard interface { ) (block.FetchBlocksMetadataResults, PageToken, error) Bootstrap( - bootstrappedSeries map[ident.Hash]result.DatabaseSeriesBlocks, + bootstrappedSeries *result.Map, ) error // Flush flushes the series' in this shard. diff --git a/tools/read_data_files/main/main.go b/tools/read_data_files/main/main.go index d8cab39b0d..2286b256b2 100644 --- a/tools/read_data_files/main/main.go +++ b/tools/read_data_files/main/main.go @@ -75,7 +75,7 @@ func main() { } data.IncRef() - iter := m3tsz.NewReaderIterator(bytes.NewReader(data.Get()), true, encodingOpts) + iter := m3tsz.NewReaderIterator(bytes.NewReader(data.Bytes()), true, encodingOpts) for iter.Next() { dp, _, _ := iter.Current() // Use fmt package so it goes to stdout instead of stderr diff --git a/tools/read_ids/main/main.go b/tools/read_ids/main/main.go index 3794e6dd60..045b0b8f33 100644 --- a/tools/read_ids/main/main.go +++ b/tools/read_ids/main/main.go @@ -100,7 +100,7 @@ func main() { attemptFn := func() error { tctx, _ := thrift.NewContext(60 * time.Second) req := rpc.NewFetchBlocksMetadataRawRequest() - req.NameSpace = ident.StringID(namespace).Data().Get() + req.NameSpace = ident.StringID(namespace).Data().Bytes() req.Shard = int32(shard) req.RangeStart = 0 req.RangeEnd = time.Now().Add(365 * 24 * time.Hour).UnixNano() diff --git a/tools/verify_commitlogs/main/main.go b/tools/verify_commitlogs/main/main.go index 42907405f0..89afa9159c 100644 --- a/tools/verify_commitlogs/main/main.go +++ b/tools/verify_commitlogs/main/main.go @@ -264,7 +264,7 @@ func main() { for shard, result := range result.ShardResults() { log.WithFields( xlog.NewField("shard", shard), - xlog.NewField("series", len(result.AllSeries())), + xlog.NewField("series", result.AllSeries().Len()), ).Infof("shard result") } } diff --git a/ts/segment.go b/ts/segment.go index c774a2f54f..65a38d0e66 100644 --- a/ts/segment.go +++ b/ts/segment.go @@ -91,16 +91,16 @@ func (s *Segment) Len() int { func (s *Segment) Equal(other *Segment) bool { var head, tail, otherHead, otherTail []byte if s.Head != nil { - head = s.Head.Get() + head = s.Head.Bytes() } if s.Tail != nil { - tail = s.Tail.Get() + tail = s.Tail.Bytes() } if other.Head != nil { - otherHead = other.Head.Get() + otherHead = other.Head.Bytes() } if other.Tail != nil { - otherTail = other.Tail.Get() + otherTail = other.Tail.Bytes() } return bytes.Equal(append(head, tail...), append(otherHead, otherTail...)) } diff --git a/x/xio/reader.go b/x/xio/reader.go index b06341fef9..1b2ecf71f2 100644 --- a/x/xio/reader.go +++ b/x/xio/reader.go @@ -113,10 +113,10 @@ func (sr *segmentReader) Read(b []byte) (int, error) { } var head, tail []byte if b := sr.segment.Head; b != nil { - head = b.Get() + head = b.Bytes() } if b := sr.segment.Tail; b != nil { - tail = b.Get() + tail = b.Bytes() } nh, nt := len(head), len(tail) if sr.si >= nh+nt { diff --git a/x/xio/reader_test.go b/x/xio/reader_test.go index aea0dc55d5..3c5f57dba7 100644 --- a/x/xio/reader_test.go +++ b/x/xio/reader_test.go @@ -171,6 +171,6 @@ func TestSegmentReader(t *testing.T) { seg, err := r.Segment() require.NoError(t, err) - require.Equal(t, head, seg.Head.Get()) - require.Equal(t, tail, seg.Tail.Get()) + require.Equal(t, head, seg.Head.Bytes()) + require.Equal(t, tail, seg.Tail.Bytes()) } diff --git a/x/xpool/pool_test.go b/x/xpool/pool_test.go index ee7948f899..48b362639b 100644 --- a/x/xpool/pool_test.go +++ b/x/xpool/pool_test.go @@ -39,12 +39,12 @@ func TestCheckedBytesWrapperPool(t *testing.T) { cb := pool.Get(initBytes) cb.IncRef() - require.Equal(t, initBytes, cb.Get()) + require.Equal(t, initBytes, cb.Bytes()) cb.DecRef() cb.Finalize() cb.IncRef() - require.Nil(t, cb.Get()) + require.Nil(t, cb.Bytes()) cb.DecRef() require.Equal(t, []byte("some-string"), initBytes) }