Skip to content

Commit ebaec74

Browse files
byahn0996facebook-github-bot
authored andcommitted
Fix hit ratio issue by fixing key generation logic
Summary: Adding useLegacyKeyGen option to fill the gap between the old test config files based on old key generate logic which was changed by D23183497 back in 2020. Currently we cannot generate expected key pattern and hit ratio with the existing test configs and it's causing problem in our SSD qual process. Reviewed By: therealgymmy Differential Revision: D69520704 fbshipit-source-id: 3bedef7c4851cc4d94fb93fe912a6d20efb5ddd0
1 parent 3a37d57 commit ebaec74

File tree

6 files changed

+55
-16
lines changed

6 files changed

+55
-16
lines changed

cachelib/cachebench/test_configs/ssd_perf/graph_cache_leader/config.json

+4-2
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@
5353
"loneGetRatio": 0.2315436539873129,
5454
"popDistFile": "fbobj_pop.json",
5555
"setRatio": 0.0,
56-
"valSizeDistFile": "fbobj_sizes.json"
56+
"valSizeDistFile": "fbobj_sizes.json",
57+
"useLegacyKeyGen": true
5758
},
5859
{
5960
"addChainedRatio": 0.0,
@@ -69,7 +70,8 @@
6970
"loneGetRatio": 0.1158887280020357,
7071
"popDistFile": "assoc_pop.json",
7172
"setRatio": 0.0,
72-
"valSizeDistFile": "assoc_sizes.json"
73+
"valSizeDistFile": "assoc_sizes.json",
74+
"useLegacyKeyGen": true
7375
}
7476
]
7577
}

cachelib/cachebench/test_configs/ssd_perf/kvcache_l2_wc/config.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@
4242
"setRatio": 0.0,
4343
"popDistFile": "pop.json",
4444
"setRatio": 0.0,
45-
"valSizeDistFile": "sizes.json"
45+
"valSizeDistFile": "sizes.json",
46+
"useLegacyKeyGen": true
4647
}
4748
],
4849

cachelib/cachebench/util/Config.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ DistributionConfig::DistributionConfig(const folly::dynamic& jsonConfig,
171171
JSONSetVal(jsonConfig, updateRatio);
172172
JSONSetVal(jsonConfig, couldExistRatio);
173173

174+
JSONSetVal(jsonConfig, useLegacyKeyGen);
175+
174176
auto readFile = [&](const std::string& f) {
175177
std::string str;
176178
const std::string path = folly::sformat("{}/{}", configPath, f);
@@ -194,7 +196,7 @@ DistributionConfig::DistributionConfig(const folly::dynamic& jsonConfig,
194196
JSONSetVal(configJsonPop, popularityWeights);
195197
}
196198

197-
checkCorrectSize<DistributionConfig, 368>();
199+
checkCorrectSize<DistributionConfig, 376>();
198200
}
199201

200202
ReplayGeneratorConfig::ReplayGeneratorConfig(const folly::dynamic& configJson) {

cachelib/cachebench/util/Config.h

+15
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,21 @@ struct DistributionConfig : public JSONConfig {
7474
double updateRatio{0.0};
7575
double couldExistRatio{0.0};
7676

77+
// Set useLegacyKeyGen true when using the old distribution data based on old
78+
// key generation scheme. (ex. test configs like graph_cache_leader or
79+
// kvcache_l2_wc).
80+
//
81+
// Our old key generation scheme didn't populate all the keys within the key
82+
// space. It was just using some of the keys which was grabbed for the
83+
// popularity data from the production workload. So, even though numKeys in
84+
// config can be configured to any number, # of total utilized keys are always
85+
// # of collected keys or less than that. This was changed to generate any key
86+
// within the key space given by numKeys, but with the new scheme, old test
87+
// configs key population data is not working anymore. Since we still need to
88+
// test with the old test configs based on old key generation scheme, this
89+
// option is added.
90+
bool useLegacyKeyGen{false};
91+
7792
bool usesChainedItems() const { return addChainedRatio > 0; }
7893

7994
// for continuous value sizes, the probability is expressed per interval

cachelib/cachebench/workload/FastDiscrete.h

+26-11
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,11 @@ class FastDiscreteDistribution final : public Distribution {
8282
size_t right,
8383
std::vector<size_t> sizes,
8484
std::vector<double> probs,
85+
bool useLegacyKeyGen,
8586
size_t numBuckets = 2048)
86-
: leftOffset_(left), rightOffset_(right) {
87+
: leftOffset_(left),
88+
rightOffset_(right),
89+
useLegacyKeyGen_(useLegacyKeyGen) {
8790
double totalWeight = std::accumulate(probs.begin(), probs.end(), 0.0);
8891
double totalObjects = std::accumulate(sizes.begin(), sizes.end(), 0.0);
8992
bucketWeight_ = totalWeight / numBuckets;
@@ -113,15 +116,25 @@ class FastDiscreteDistribution final : public Distribution {
113116
// Determine bucket size. In doing so, take max against 1
114117
// before and after scaling to account for scaling factors
115118
objectsSeen = std::max(1UL, objectsSeen);
116-
auto scaledObjects =
117-
static_cast<uint64_t>(objectsSeen * scalingFactor_);
118-
buckets.push_back(std::max(1UL, scaledObjects));
119-
DCHECK_LE(bucketOffsets_.back() + buckets.back(), rightOffset_);
120-
121-
// determine the offset for next bucket
122-
auto nextOffset = static_cast<uint64_t>(sumObjects * scalingFactor_);
123-
bucketOffsets_.push_back(nextOffset);
124-
DCHECK_LE(nextOffset, rightOffset_);
119+
120+
if (useLegacyKeyGen_) {
121+
buckets.push_back(std::max(1UL, objectsSeen));
122+
DCHECK_LE(bucketOffsets_.back() + buckets.back(), rightOffset_);
123+
124+
bucketOffsets_.push_back(sumObjects);
125+
DCHECK_LE(sumObjects, rightOffset_);
126+
} else {
127+
auto scaledObjects =
128+
static_cast<uint64_t>(objectsSeen * scalingFactor_);
129+
buckets.push_back(std::max(1UL, scaledObjects));
130+
DCHECK_LE(bucketOffsets_.back() + buckets.back(), rightOffset_);
131+
132+
// determine the offset for next bucket
133+
auto nextOffset = static_cast<uint64_t>(sumObjects * scalingFactor_);
134+
bucketOffsets_.push_back(nextOffset);
135+
DCHECK_LE(nextOffset, rightOffset_);
136+
}
137+
125138
weightSeen = 0.0;
126139
objectsSeen = 0;
127140
} else {
@@ -147,8 +160,9 @@ class FastDiscreteDistribution final : public Distribution {
147160
size_t bucket = bucketDistribution_(gen);
148161
size_t objectInBucket = facebook::cachelib::util::narrow_cast<size_t>(
149162
insideBucketDistributions_[bucket](gen));
163+
auto multiplier = useLegacyKeyGen_ ? scalingFactor_ : 1;
150164
auto ret = facebook::cachelib::util::narrow_cast<size_t>(
151-
(bucketOffsets_[bucket] + objectInBucket)) +
165+
(multiplier * (bucketOffsets_[bucket] + objectInBucket))) +
152166
leftOffset_;
153167
XDCHECK_LE(ret, rightOffset_);
154168
XDCHECK_GE(ret, leftOffset_);
@@ -169,6 +183,7 @@ class FastDiscreteDistribution final : public Distribution {
169183
std::vector<uint64_t> bucketOffsets_{};
170184
const size_t leftOffset_{};
171185
const size_t rightOffset_{};
186+
bool useLegacyKeyGen_{false};
172187
double scalingFactor_{};
173188
double bucketWeight_{};
174189
std::uniform_int_distribution<uint64_t> bucketDistribution_{};

cachelib/cachebench/workload/WorkloadDistribution.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,11 @@ class WorkloadDistribution {
9292
std::unique_ptr<Distribution> getPopDist(size_t left, size_t right) const {
9393
if (config_.usesDiscretePopularity()) {
9494
return std::make_unique<FastDiscreteDistribution>(
95-
left, right, config_.popularityBuckets, config_.popularityWeights);
95+
left,
96+
right,
97+
config_.popularityBuckets,
98+
config_.popularityWeights,
99+
config_.useLegacyKeyGen);
96100
} else {
97101
// TODO In general, could have different keyFrequency factor besides 2
98102
double mu = (left + right) * 0.5;

0 commit comments

Comments
 (0)