Skip to content

Commit

Permalink
Allow more intra-L0 compaction when L0 is small (#12214)
Browse files Browse the repository at this point in the history
Summary:
introduce a new option `intra_l0_compaction_size` to allow more intra-L0 compaction when total L0 size is under a threshold. This option applies only to leveled compaction. It is enabled by default and set to `max_bytes_for_level_base / max_bytes_for_level_multiplier` only for atomic_flush users. When atomic_flush=true, it is more likely that some CF's total L0 size is small when it's eligible for compaction. This option aims to reduce write amplification in this case.

Pull Request resolved: facebook/rocksdb#12214

Test Plan:
- new unit test
- benchmark:
```
TEST_TMPDIR=/dev/shm ./db_bench --benchmarks=fillrandom --write_buffer_size=51200 --max_bytes_for_level_base=5242880 --level0_file_num_compaction_trigger=4 --statistics=1

main:
fillrandom   :     234.499 micros/op 4264 ops/sec 234.499 seconds 1000000 operations;    0.5 MB/s
rocksdb.compact.read.bytes COUNT : 1490756235
rocksdb.compact.write.bytes COUNT : 1469056734
rocksdb.flush.write.bytes COUNT : 71099011

branch:
fillrandom   :     128.494 micros/op 7782 ops/sec 128.494 seconds 1000000 operations;    0.9 MB/s
rocksdb.compact.read.bytes COUNT : 807474156
rocksdb.compact.write.bytes COUNT : 781977610
rocksdb.flush.write.bytes COUNT : 71098785
```

Reviewed By: ajkr

Differential Revision: D52637771

Pulled By: cbi42

fbshipit-source-id: 4f2c7925d0c3a718635c948ea0d4981ed9fabec3
(cherry picked from commit 4b684e96b71cd6f1d50e29fae8b55c323ccd2869)
  • Loading branch information
cbi42 authored and mayuehappy committed Jul 9, 2024
1 parent ef8a8ec commit f532aa6
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 0 deletions.
65 changes: 65 additions & 0 deletions db/compaction/compaction_picker_level.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,15 @@ class LevelCompactionBuilder {
// otherwise, returns false.
bool PickIntraL0Compaction();

// When total L0 size is small compared to Lbase, try to pick intra-L0
// compaction starting from the newest L0 file. This helps to prevent
// L0->Lbase compaction with large write-amp.
//
// Returns true iff an intra-L0 compaction is picked.
// `start_level_inputs_` and `output_level_` will be updated accordingly if
// a compaction is picked.
bool PickSizeBasedIntraL0Compaction();

// Return true if TrivialMove is extended. `start_index` is the index of
// the initial file picked, which should already be in `start_level_inputs_`.
bool TryExtendNonL0TrivialMove(int start_index,
Expand Down Expand Up @@ -758,6 +767,9 @@ bool LevelCompactionBuilder::PickFileToCompact() {
// being compacted at level 0.
if (start_level_ == 0 &&
!compaction_picker_->level0_compactions_in_progress()->empty()) {
if (PickSizeBasedIntraL0Compaction()) {
return true;
}
TEST_SYNC_POINT("LevelCompactionPicker::PickCompactionBySize:0");
return false;
}
Expand All @@ -770,6 +782,9 @@ bool LevelCompactionBuilder::PickFileToCompact() {
if (TryPickL0TrivialMove()) {
return true;
}
if (start_level_ == 0 && PickSizeBasedIntraL0Compaction()) {
return true;
}

const std::vector<FileMetaData*>& level_files =
vstorage_->LevelFiles(start_level_);
Expand Down Expand Up @@ -874,6 +889,56 @@ bool LevelCompactionBuilder::PickIntraL0Compaction() {
mutable_cf_options_.max_compaction_bytes,
&start_level_inputs_);
}

bool LevelCompactionBuilder::PickSizeBasedIntraL0Compaction() {
assert(start_level_ == 0);
int base_level = vstorage_->base_level();
if (base_level <= 0) {
return false;
}
const std::vector<FileMetaData*>& l0_files =
vstorage_->LevelFiles(/*level=*/0);
size_t min_num_file =
std::max(2, mutable_cf_options_.level0_file_num_compaction_trigger);
if (l0_files.size() < min_num_file) {
return false;
}
uint64_t l0_size = 0;
for (const auto& file : l0_files) {
l0_size += file->fd.GetFileSize();
}
const uint64_t min_lbase_size =
l0_size * static_cast<uint64_t>(std::max(
10.0, mutable_cf_options_.max_bytes_for_level_multiplier));
assert(min_lbase_size >= l0_size);
const std::vector<FileMetaData*>& lbase_files =
vstorage_->LevelFiles(/*level=*/base_level);
uint64_t lbase_size = 0;
for (const auto& file : lbase_files) {
lbase_size += file->fd.GetFileSize();
if (lbase_size > min_lbase_size) {
break;
}
}
if (lbase_size <= min_lbase_size) {
return false;
}

start_level_inputs_.clear();
start_level_inputs_.level = 0;
for (const auto& file : l0_files) {
if (file->being_compacted) {
break;
}
start_level_inputs_.files.push_back(file);
}
if (start_level_inputs_.files.size() < min_num_file) {
start_level_inputs_.clear();
return false;
}
output_level_ = 0;
return true /* picked an intra-L0 compaction */;
}
} // namespace

Compaction* LevelCompactionPicker::PickCompaction(
Expand Down
52 changes: 52 additions & 0 deletions db/compaction/compaction_picker_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4152,6 +4152,58 @@ TEST_P(PerKeyPlacementCompactionPickerTest,
INSTANTIATE_TEST_CASE_P(PerKeyPlacementCompactionPickerTest,
PerKeyPlacementCompactionPickerTest, ::testing::Bool());

TEST_F(CompactionPickerTest, IntraL0WhenL0IsSmall) {
mutable_cf_options_.level0_file_num_compaction_trigger = 4;
mutable_cf_options_.max_bytes_for_level_multiplier = 10;
for (const uint64_t lbase_size_multiplier : {1, 10, 11, 40}) {
SCOPED_TRACE("lbase_size_multiplier=" +
std::to_string(lbase_size_multiplier));
NewVersionStorage(6, kCompactionStyleLevel);
// When L0 size is <= Lbase size / max_bytes_for_level_multiplier,
// intra-L0 compaction is picked. Otherwise, L0->L1
// compaction is picked.
Add(/*level=*/0, /*file_number=*/1U, /*smallest=*/"100",
/*largest=*/"200", /*file_size=*/1000, /*path_id=*/0,
/*smallest_seq=*/10, /*largest_seq=*/11,
/*compensated_file_size=*/1000);
Add(/*level=*/0, /*file_number=*/2U, /*smallest=*/"100",
/*largest=*/"100", /*file_size=*/1000, /*path_id=*/0,
/*smallest_seq=*/20, /*largest_seq=*/21,
/*compensated_file_size=*/1000);
Add(/*level=*/0, /*file_number=*/3U, /*smallest=*/"100",
/*largest=*/"200", /*file_size=*/1000, /*path_id=*/0,
/*smallest_seq=*/30, /*largest_seq=*/31,
/*compensated_file_size=*/1000);
Add(/*level=*/0, /*file_number=*/4U, /*smallest=*/"100",
/*largest=*/"200", /*file_size=*/1000, /*path_id=*/0,
/*smallest_seq=*/40, /*largest_seq=*/41,
/*compensated_file_size=*/1000);
const uint64_t l0_size = 4000;
const uint64_t lbase_size = l0_size * lbase_size_multiplier;
Add(/*level=*/1, /*file_number=*/5U, /*smallest=*/"100",
/*largest=*/"200", /*file_size=*/lbase_size, /*path_id=*/0,
/*smallest_seq=*/0, /*largest_seq=*/0,
/*compensated_file_size=*/lbase_size);
UpdateVersionStorageInfo();

LevelCompactionPicker compaction_picker(ioptions_, &icmp_);
std::unique_ptr<Compaction> compaction(compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(),
&log_buffer_));
ASSERT_TRUE(compaction.get() != nullptr);
ASSERT_EQ(CompactionReason::kLevelL0FilesNum,
compaction->compaction_reason());
ASSERT_EQ(4U, compaction->num_input_files(0));
if (lbase_size_multiplier >
mutable_cf_options_.max_bytes_for_level_multiplier) {
ASSERT_EQ(1U, compaction->num_input_levels());
ASSERT_EQ(0, compaction->output_level());
} else {
ASSERT_EQ(2U, compaction->num_input_levels());
ASSERT_EQ(1, compaction->output_level());
}
}
}

} // namespace ROCKSDB_NAMESPACE

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* For leveled compaction, RocksDB will try to do intra-L0 compaction if the total L0 size is small compared to Lbase (#12214). Users with atomic_flush=true are more likely to see the impact of this change.

0 comments on commit f532aa6

Please sign in to comment.