Skip to content

Commit

Permalink
add simulator Cache as class SimCache/SimLRUCache(with test)
Browse files Browse the repository at this point in the history
Summary: add class SimCache(base class with instrumentation api) and SimLRUCache(derived class with detailed implementation) which is used as an instrumented block cache that can predict hit rate for different cache size

Test Plan:
Add a test case in `db_block_cache_test.cc` called `SimCacheTest` to test basic logic of SimCache.
Also add option `-simcache_size` in db_bench. if set with a value other than -1, then the benchmark will use this value as the size of the simulator cache and finally output the simulation result.
```
[[email protected] ~/local/rocksdb] ./db_bench -benchmarks "fillseq,readrandom" -cache_size 1000000 -simcache_size 1000000
RocksDB:    version 4.8
Date:       Tue May 17 16:56:16 2016
CPU:        32 * Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz
CPUCache:   20480 KB
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
Prefix:    0 bytes
Keys per prefix:    0
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)
Write rate: 0 bytes/second
Compression: Snappy
Memtablerep: skip_list
Perf Level: 0
WARNING: Assertions are enabled; benchmarks unnecessarily slow
------------------------------------------------
DB path: [/tmp/rocksdbtest-112628/dbbench]
fillseq      :       6.809 micros/op 146874 ops/sec;   16.2 MB/s
DB path: [/tmp/rocksdbtest-112628/dbbench]
readrandom   :       6.343 micros/op 157665 ops/sec;   17.4 MB/s (1000000 of 1000000 found)

SIMULATOR CACHE STATISTICS:
SimCache LOOKUPs: 986559
SimCache HITs:    264760
SimCache HITRATE: 26.84%

[[email protected] ~/local/rocksdb] ./db_bench -benchmarks "fillseq,readrandom" -cache_size 1000000 -simcache_size 10000000
RocksDB:    version 4.8
Date:       Tue May 17 16:57:10 2016
CPU:        32 * Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz
CPUCache:   20480 KB
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
Prefix:    0 bytes
Keys per prefix:    0
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)
Write rate: 0 bytes/second
Compression: Snappy
Memtablerep: skip_list
Perf Level: 0
WARNING: Assertions are enabled; benchmarks unnecessarily slow
------------------------------------------------
DB path: [/tmp/rocksdbtest-112628/dbbench]
fillseq      :       5.066 micros/op 197394 ops/sec;   21.8 MB/s
DB path: [/tmp/rocksdbtest-112628/dbbench]
readrandom   :       6.457 micros/op 154870 ops/sec;   17.1 MB/s (1000000 of 1000000 found)

SIMULATOR CACHE STATISTICS:
SimCache LOOKUPs: 1059764
SimCache HITs:    374501
SimCache HITRATE: 35.34%

[[email protected] ~/local/rocksdb] ./db_bench -benchmarks "fillseq,readrandom" -cache_size 1000000 -simcache_size 100000000
RocksDB:    version 4.8
Date:       Tue May 17 16:57:32 2016
CPU:        32 * Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz
CPUCache:   20480 KB
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
Prefix:    0 bytes
Keys per prefix:    0
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)
Write rate: 0 bytes/second
Compression: Snappy
Memtablerep: skip_list
Perf Level: 0
WARNING: Assertions are enabled; benchmarks unnecessarily slow
------------------------------------------------
DB path: [/tmp/rocksdbtest-112628/dbbench]
fillseq      :       5.632 micros/op 177572 ops/sec;   19.6 MB/s
DB path: [/tmp/rocksdbtest-112628/dbbench]
readrandom   :       6.892 micros/op 145094 ops/sec;   16.1 MB/s (1000000 of 1000000 found)

SIMULATOR CACHE STATISTICS:
SimCache LOOKUPs: 1150767
SimCache HITs:    1034535
SimCache HITRATE: 89.90%
```

Reviewers: IslamAbdelRahman, andrewkr, sdong

Reviewed By: sdong

Subscribers: MarkCallaghan, andrewkr, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D57999
  • Loading branch information
lightmark committed May 24, 2016
1 parent d379d11 commit 5d66025
Show file tree
Hide file tree
Showing 9 changed files with 492 additions and 90 deletions.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ TESTS = \
backupable_db_test \
document_db_test \
json_document_test \
sim_cache_test \
spatial_db_test \
version_edit_test \
version_set_test \
Expand Down Expand Up @@ -953,6 +954,9 @@ document_db_test: utilities/document/document_db_test.o $(LIBOBJECTS) $(TESTHARN
json_document_test: utilities/document/json_document_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

sim_cache_test: utilities/simulator_cache/sim_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

spatial_db_test: utilities/spatialdb/spatial_db_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

Expand Down
27 changes: 12 additions & 15 deletions include/rocksdb/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,13 @@
#ifndef STORAGE_ROCKSDB_INCLUDE_CACHE_H_
#define STORAGE_ROCKSDB_INCLUDE_CACHE_H_

#include <memory>
#include <stdint.h>
#include <memory>
#include "rocksdb/slice.h"
#include "rocksdb/status.h"

namespace rocksdb {

using std::shared_ptr;

class Cache;

// Create a new cache with a fixed size capacity. The cache is sharded
Expand All @@ -39,23 +37,23 @@ class Cache;
//
// The parameter num_shard_bits defaults to 4, and strict_capacity_limit
// defaults to false.
extern shared_ptr<Cache> NewLRUCache(size_t capacity);
extern shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits);
extern shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit);
extern std::shared_ptr<Cache> NewLRUCache(size_t capacity);
extern std::shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits);
extern std::shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit);

class Cache {
public:
Cache() { }
Cache() {}

// Destroys all existing entries by calling the "deleter"
// function that was passed via the Insert() function.
//
// @See Insert
virtual ~Cache();
virtual ~Cache() {}

// Opaque handle to an entry stored in the cache.
struct Handle { };
struct Handle {};

// Insert a mapping from key->value into the cache and assign it
// the specified charge against the total cache capacity.
Expand Down Expand Up @@ -98,9 +96,8 @@ class Cache {
// underlying entry will be kept around until all existing handles
// to it have been released.
virtual void Erase(const Slice& key) = 0;

// Return a new numeric id. May be used by multiple clients who are
// sharing the same cache to partition the key space. Typically the
// sharding the same cache to partition the key space. Typically the
// client will allocate a new id at startup and prepend the id to
// its cache keys.
virtual uint64_t NewId() = 0;
Expand Down Expand Up @@ -136,8 +133,8 @@ class Cache {
// memory - call this only if you're shutting down the process.
// Any attempts of using cache after this call will fail terribly.
// Always delete the DB object before calling this method!
virtual void DisownData() {
// default implementation is noop
virtual void DisownData(){
// default implementation is noop
};

// Apply callback to all entries in the cache
Expand All @@ -157,7 +154,7 @@ class Cache {

// No copying allowed
Cache(const Cache&);
void operator=(const Cache&);
Cache& operator=(const Cache&);
};

} // namespace rocksdb
Expand Down
67 changes: 67 additions & 0 deletions include/rocksdb/utilities/sim_cache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

#pragma once

#include <stdint.h>
#include <memory>
#include <string>
#include "rocksdb/cache.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "util/lru_cache_handle.h"

namespace rocksdb {

class SimCache;

// For instrumentation purpose, use NewSimCache instead of NewLRUCache API
// NewSimCache is a wrapper function returning a SimCache instance that can
// have additional interface provided in Simcache class besides Cache interface
// to predict block cache hit rate without actually allocating the memory. It
// can help users tune their current block cache size, and determine how
// efficient they are using the memory.
extern std::shared_ptr<SimCache> NewSimCache(std::shared_ptr<Cache> cache,
size_t sim_capacity,
int num_shard_bits);

class SimCache : public Cache {
public:
SimCache() {}

virtual ~SimCache() {}

// returns the maximum configured capacity of the simcache for simulation
virtual size_t GetSimCapacity() const = 0;

// simcache doesn't provide internal handler reference to user, so always
// PinnedUsage = 0 and the behavior will be not exactly consistent the
// with real cache.
// returns the memory size for the entries residing in the simcache.
virtual size_t GetSimUsage() const = 0;

// sets the maximum configured capacity of the simcache. When the new
// capacity is less than the old capacity and the existing usage is
// greater than new capacity, the implementation will purge old entries
// to fit new capapicty.
virtual void SetSimCapacity(size_t capacity) = 0;

// returns the lookup times of simcache
virtual uint64_t get_lookup_counter() const = 0;
// returns the hit times of simcache
virtual uint64_t get_hit_counter() const = 0;
// returns the hit rate of simcache
virtual double get_hit_rate() const = 0;
// reset the lookup and hit counters
virtual void reset_counter() = 0;
// String representation of the statistics of the simcache
virtual std::string ToString() const = 0;

private:
SimCache(const SimCache&);
SimCache& operator=(const SimCache&);
};

} // namespace rocksdb
2 changes: 2 additions & 0 deletions src.mk
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ LIB_SOURCES = \
utilities/merge_operators/uint64add.cc \
utilities/options/options_util.cc \
utilities/redis/redis_lists.cc \
utilities/simulator_cache/sim_cache.cc \
utilities/spatialdb/spatial_db.cc \
utilities/table_properties_collectors/compact_on_deletion_collector.cc \
utilities/transactions/optimistic_transaction_impl.cc \
Expand Down Expand Up @@ -272,6 +273,7 @@ TEST_BENCH_SOURCES = \
utilities/merge_operators/string_append/stringappend_test.cc \
utilities/options/options_util_test.cc \
utilities/redis/redis_lists_test.cc \
utilities/simulator_cache/sim_cache_test.cc \
utilities/spatialdb/spatial_db_test.cc \
utilities/table_properties_collectors/compact_on_deletion_collector_test.cc \
utilities/transactions/optimistic_transaction_test.cc \
Expand Down
24 changes: 22 additions & 2 deletions tools/db_bench_tool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include "rocksdb/slice_transform.h"
#include "rocksdb/utilities/flashcache.h"
#include "rocksdb/utilities/optimistic_transaction_db.h"
#include "rocksdb/utilities/sim_cache.h"
#include "rocksdb/utilities/transaction.h"
#include "rocksdb/utilities/transaction_db.h"
#include "rocksdb/write_batch.h"
Expand Down Expand Up @@ -334,8 +335,13 @@ DEFINE_int32(universal_compression_size_percent, -1,
DEFINE_bool(universal_allow_trivial_move, false,
"Allow trivial move in universal compaction.");

DEFINE_int64(cache_size, -1, "Number of bytes to use as a cache of uncompressed"
"data. Negative means use default settings.");
DEFINE_int64(cache_size, -1,
"Number of bytes to use as a cache of uncompressed"
" data. Negative means use default settings.");

DEFINE_int64(simcache_size, -1,
"Number of bytes to use as a simcache of "
"uncompressed data. Negative means use default settings.");

DEFINE_bool(cache_index_and_filter_blocks, false,
"Cache index/filter blocks in block cache.");
Expand Down Expand Up @@ -1808,6 +1814,16 @@ class Benchmark {
merge_keys_(FLAGS_merge_keys < 0 ? FLAGS_num : FLAGS_merge_keys),
report_file_operations_(FLAGS_report_file_operations),
cachedev_fd_(-1) {
// use simcache instead of cache
if (FLAGS_simcache_size >= 0) {
if (FLAGS_cache_numshardbits >= 1) {
cache_ =
NewSimCache(cache_, FLAGS_simcache_size, FLAGS_cache_numshardbits);
} else {
cache_ = NewSimCache(cache_, FLAGS_simcache_size, 0);
}
}

if (report_file_operations_) {
if (!FLAGS_hdfs.empty()) {
fprintf(stderr,
Expand Down Expand Up @@ -2101,6 +2117,10 @@ class Benchmark {
if (FLAGS_statistics) {
fprintf(stdout, "STATISTICS:\n%s\n", dbstats->ToString().c_str());
}
if (FLAGS_simcache_size) {
fprintf(stdout, "SIMULATOR CACHE STATISTICS:\n%s\n",
std::dynamic_pointer_cast<SimCache>(cache_)->ToString().c_str());
}
}

private:
Expand Down
86 changes: 13 additions & 73 deletions util/cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,73 +11,19 @@
#include <stdio.h>
#include <stdlib.h>

#include "rocksdb/cache.h"
#include "port/port.h"
#include "rocksdb/cache.h"
#include "util/autovector.h"
#include "util/hash.h"
#include "util/lru_cache_handle.h"
#include "util/mutexlock.h"

namespace rocksdb {

Cache::~Cache() {
}

namespace {

// LRU cache implementation

// An entry is a variable length heap-allocated structure.
// Entries are referenced by cache and/or by any external entity.
// The cache keeps all its entries in table. Some elements
// are also stored on LRU list.
//
// LRUHandle can be in these states:
// 1. Referenced externally AND in hash table.
// In that case the entry is *not* in the LRU. (refs > 1 && in_cache == true)
// 2. Not referenced externally and in hash table. In that case the entry is
// in the LRU and can be freed. (refs == 1 && in_cache == true)
// 3. Referenced externally and not in hash table. In that case the entry is
// in not on LRU and not in table. (refs >= 1 && in_cache == false)
//
// All newly created LRUHandles are in state 1. If you call LRUCache::Release
// on entry in state 1, it will go into state 2. To move from state 1 to
// state 3, either call LRUCache::Erase or LRUCache::Insert with the same key.
// To move from state 2 to state 1, use LRUCache::Lookup.
// Before destruction, make sure that no handles are in state 1. This means
// that any successful LRUCache::Lookup/LRUCache::Insert have a matching
// RUCache::Release (to move into state 2) or LRUCache::Erase (for state 3)

struct LRUHandle {
void* value;
void (*deleter)(const Slice&, void* value);
LRUHandle* next_hash;
LRUHandle* next;
LRUHandle* prev;
size_t charge; // TODO(opt): Only allow uint32_t?
size_t key_length;
uint32_t refs; // a number of refs to this entry
// cache itself is counted as 1
bool in_cache; // true, if this entry is referenced by the hash table
uint32_t hash; // Hash of key(); used for fast sharding and comparisons
char key_data[1]; // Beginning of key

Slice key() const {
// For cheaper lookups, we allow a temporary Handle object
// to store a pointer to a key in "value".
if (next == this) {
return *(reinterpret_cast<Slice*>(value));
} else {
return Slice(key_data, key_length);
}
}

void Free() {
assert((refs == 1 && in_cache) || (refs == 0 && !in_cache));
(*deleter)(key(), value);
delete[] reinterpret_cast<char*>(this);
}
};

// We provide our own simple hash table since it removes a whole bunch
// of porting hacks and is also faster than some of the built-in hash
// table implementations in some of the compiler/runtime combinations
Expand Down Expand Up @@ -151,8 +97,7 @@ class HandleTable {
// pointer to the trailing slot in the corresponding linked list.
LRUHandle** FindPointer(const Slice& key, uint32_t hash) {
LRUHandle** ptr = &list_[hash & (length_ - 1)];
while (*ptr != nullptr &&
((*ptr)->hash != hash || key != (*ptr)->key())) {
while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
ptr = &(*ptr)->next_hash;
}
return ptr;
Expand Down Expand Up @@ -238,8 +183,7 @@ class LRUCache {
// to hold (usage_ + charge) is freed or the lru list is empty
// This function is not thread safe - it needs to be executed while
// holding the mutex_
void EvictFromLRU(size_t charge,
autovector<LRUHandle*>* deleted);
void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);

// Initialized before use.
size_t capacity_;
Expand Down Expand Up @@ -310,9 +254,8 @@ void LRUCache::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
if (thread_safe) {
mutex_.Lock();
}
table_.ApplyToAllCacheEntries([callback](LRUHandle* h) {
callback(h->value, h->charge);
});
table_.ApplyToAllCacheEntries(
[callback](LRUHandle* h) { callback(h->value, h->charge); });
if (thread_safe) {
mutex_.Unlock();
}
Expand All @@ -338,8 +281,7 @@ void LRUCache::LRU_Append(LRUHandle* e) {
lru_usage_ += e->charge;
}

void LRUCache::EvictFromLRU(size_t charge,
autovector<LRUHandle*>* deleted) {
void LRUCache::EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted) {
while (usage_ + charge > capacity_ && lru_.next != &lru_) {
LRUHandle* old = lru_.next;
assert(old->in_cache);
Expand Down Expand Up @@ -430,7 +372,7 @@ Status LRUCache::Insert(const Slice& key, uint32_t hash, void* value,
// If the cache is full, we'll have to release it
// It shouldn't happen very often though.
LRUHandle* e = reinterpret_cast<LRUHandle*>(
new char[sizeof(LRUHandle) - 1 + key.size()]);
new char[sizeof(LRUHandle) - 1 + key.size()]);
Status s;
autovector<LRUHandle*> last_reference_list;

Expand Down Expand Up @@ -556,9 +498,7 @@ class ShardedLRUCache : public Cache {
shards_[s].SetStrictCapacityLimit(strict_capacity_limit);
}
}
virtual ~ShardedLRUCache() {
delete[] shards_;
}
virtual ~ShardedLRUCache() { delete[] shards_; }
virtual void SetCapacity(size_t capacity) override {
int num_shards = 1 << num_shard_bits_;
const size_t per_shard = (capacity + (num_shards - 1)) / num_shards;
Expand Down Expand Up @@ -651,16 +591,16 @@ class ShardedLRUCache : public Cache {

} // end anonymous namespace

shared_ptr<Cache> NewLRUCache(size_t capacity) {
std::shared_ptr<Cache> NewLRUCache(size_t capacity) {
return NewLRUCache(capacity, kNumShardBits, false);
}

shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits) {
std::shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits) {
return NewLRUCache(capacity, num_shard_bits, false);
}

shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit) {
std::shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit) {
if (num_shard_bits >= 20) {
return nullptr; // the cache cannot be sharded into too many fine pieces
}
Expand Down
Loading

0 comments on commit 5d66025

Please sign in to comment.