-
Notifications
You must be signed in to change notification settings - Fork 185
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6946 from onflow/leo/cdp-engine
[Chunk Data Pack Pruner] Add Engine for pruning chunk data pack
- Loading branch information
Showing
29 changed files
with
1,099 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package pruner | ||
|
||
import ( | ||
"math" | ||
"time" | ||
) | ||
|
||
type PruningConfig struct { | ||
Threshold uint64 // The threshold is the number of blocks that we want to keep in the database. | ||
BatchSize uint // The batch size is the number of blocks that we want to delete in one batch. | ||
SleepAfterEachBatchCommit time.Duration // The sleep time after each batch commit. | ||
SleepAfterEachIteration time.Duration // The sleep time after each iteration. | ||
} | ||
|
||
var DefaultConfig = PruningConfig{ | ||
Threshold: 30 * 60 * 60 * 24 * 1.2, // (30 days of blocks) days * hours * minutes * seconds * block_per_second | ||
BatchSize: 1200, | ||
// when choosing a value, consider the batch size and block time, | ||
// for instance, | ||
// the block time is 1.2 block/second, and the batch size is 1200, | ||
// so the batch commit time is 1200 / 1.2 = 1000 seconds. | ||
// the sleep time should be smaller than 1000 seconds, otherwise, | ||
// the pruner is not able to keep up with the block generation. | ||
SleepAfterEachBatchCommit: 12 * time.Second, | ||
SleepAfterEachIteration: math.MaxInt64, // by default it's disabled so that we can slowly roll this feature out. | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
package pruner | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"time" | ||
|
||
"github.com/cockroachdb/pebble" | ||
"github.com/dgraph-io/badger/v2" | ||
"github.com/rs/zerolog" | ||
|
||
"github.com/onflow/flow-go/module" | ||
"github.com/onflow/flow-go/module/block_iterator" | ||
"github.com/onflow/flow-go/module/block_iterator/executor" | ||
"github.com/onflow/flow-go/module/block_iterator/latest" | ||
"github.com/onflow/flow-go/state/protocol" | ||
"github.com/onflow/flow-go/storage" | ||
"github.com/onflow/flow-go/storage/operation/pebbleimpl" | ||
"github.com/onflow/flow-go/storage/store" | ||
) | ||
|
||
const NextHeightForUnprunedExecutionDataPackKey = "NextHeightForUnprunedExecutionDataPackKey" | ||
|
||
func LoopPruneExecutionDataFromRootToLatestSealed( | ||
ctx context.Context, | ||
log zerolog.Logger, | ||
metrics module.ExecutionMetrics, | ||
state protocol.State, | ||
badgerDB *badger.DB, | ||
headers storage.Headers, | ||
chunkDataPacks storage.ChunkDataPacks, | ||
results storage.ExecutionResults, | ||
chunkDataPacksDB *pebble.DB, | ||
config PruningConfig, | ||
) error { | ||
|
||
chunksDB := pebbleimpl.ToDB(chunkDataPacksDB) | ||
// the creator can be reused to create new block iterator that can iterate from the last | ||
// checkpoint to the new latest (sealed) block. | ||
creator, getNextAndLatest, err := makeBlockIteratorCreator(state, badgerDB, headers, chunksDB, config) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
pruner := NewChunkDataPackPruner(chunkDataPacks, results) | ||
|
||
// iterateAndPruneAll takes a block iterator and iterates through all the blocks | ||
// and decides how to prune the chunk data packs. | ||
iterateAndPruneAll := func(iter module.BlockIterator) error { | ||
err := executor.IterateExecuteAndCommitInBatch( | ||
ctx, log, iter, pruner, chunksDB, config.BatchSize, config.SleepAfterEachBatchCommit) | ||
if err != nil { | ||
return fmt.Errorf("failed to iterate, execute, and commit in batch: %w", err) | ||
} | ||
return nil | ||
} | ||
|
||
for { | ||
nextToPrune, latestToPrune, err := getNextAndLatest() | ||
if err != nil { | ||
return fmt.Errorf("failed to get next and latest to prune: %w", err) | ||
} | ||
|
||
log.Info(). | ||
Uint64("nextToPrune", nextToPrune). | ||
Uint64("latestToPrune", latestToPrune). | ||
Msgf("execution data pruning will start in %s at %s", | ||
config.SleepAfterEachIteration, time.Now().Add(config.SleepAfterEachIteration).UTC()) | ||
|
||
// last pruned is nextToPrune - 1. | ||
// it won't underflow, because nextToPrune starts from root + 1 | ||
metrics.ExecutionLastChunkDataPackPrunedHeight(nextToPrune - 1) | ||
|
||
select { | ||
case <-ctx.Done(): | ||
return nil | ||
// wait first so that we give the data pruning lower priority compare to other tasks. | ||
// also we can disable this feature by setting the sleep time to a very large value. | ||
// also allows the pruner to be more responsive to the context cancellation, meaning | ||
// while the pruner is sleeping, it can be cancelled immediately. | ||
case <-time.After(config.SleepAfterEachIteration): | ||
} | ||
|
||
iter, hasNext, err := creator.Create() | ||
if err != nil { | ||
return fmt.Errorf("failed to create block iterator: %w", err) | ||
} | ||
|
||
if !hasNext { | ||
// no more blocks to iterate, we are done. | ||
continue | ||
} | ||
|
||
err = iterateAndPruneAll(iter) | ||
if err != nil { | ||
return fmt.Errorf("failed to iterate, execute, and commit in batch: %w", err) | ||
} | ||
} | ||
} | ||
|
||
// makeBlockIteratorCreator create the block iterator creator | ||
func makeBlockIteratorCreator( | ||
state protocol.State, | ||
badgerDB *badger.DB, | ||
headers storage.Headers, | ||
chunkDataPacksDB storage.DB, | ||
config PruningConfig, | ||
) ( | ||
module.IteratorCreator, | ||
// this is for logging purpose, so that after each round of pruning, | ||
// we can log and report metrics about the next and latest to prune | ||
func() (nextToPrune uint64, latestToPrune uint64, err error), | ||
error, // any error are exception | ||
) { | ||
root := state.Params().SealedRoot() | ||
sealedAndExecuted := latest.NewLatestSealedAndExecuted( | ||
root, | ||
state, | ||
badgerDB, | ||
) | ||
|
||
// retrieves the latest sealed and executed block height. | ||
// the threshold ensures that a certain number of blocks are retained for querying instead of being pruned. | ||
latest := &LatestPrunable{ | ||
LatestSealedAndExecuted: sealedAndExecuted, | ||
threshold: config.Threshold, | ||
} | ||
|
||
initializer := store.NewConsumerProgress(chunkDataPacksDB, NextHeightForUnprunedExecutionDataPackKey) | ||
|
||
creator, err := block_iterator.NewHeightBasedCreator( | ||
headers.BlockIDByHeight, | ||
initializer, | ||
root, | ||
latest.Latest, | ||
) | ||
|
||
if err != nil { | ||
return nil, nil, fmt.Errorf("failed to create height based block iterator creator: %w", err) | ||
} | ||
|
||
stateReader := creator.IteratorState() | ||
|
||
return creator, func() (nextToPrune uint64, latestToPrune uint64, err error) { | ||
next, err := stateReader.LoadState() | ||
if err != nil { | ||
return 0, 0, fmt.Errorf("failed to get next height to prune: %w", err) | ||
} | ||
|
||
header, err := latest.Latest() | ||
if err != nil { | ||
return 0, 0, fmt.Errorf("failed to get latest prunable block: %w", err) | ||
} | ||
|
||
return next, header.Height, nil | ||
}, nil | ||
} |
Oops, something went wrong.