Skip to content

Commit

Permalink
tstore: Freeze trillian trees.
Browse files Browse the repository at this point in the history
This commit adds episodic checks to the tstore backend that freeze any
trillian trees for records that have been updated to a status that no
longer allows updates, such as censored or archived, and that have a
final dcr timestamp appended onto the tree.

The reason we need this requires some background knowledge on the
trillian architecture.

The trillian_log_signer polls the MySQL database at a fixed interval,
looking for leaves that have been queued up and are waiting to be
appended onto a tree. It does this for all trees that have an ACTIVE
status.

Trillian was designed to be used for a small number of trees that have
infrequent writes, but that can get very large over time. The
recommended log_signer_interval, i.e. the polling interval, was 2-3
seconds.

The way we use trillian is quite different from it's intended use case
and you see this reflected in the performance of trillian on our
servers. We set the log signer interval to 200ms because we require the
leaves be appended onto a tree in order for a write to be considered valid.
We also use a new tree for each record. This results in a large number of 
trees that get polled by the log signer every 200ms and is why the CPUs 
spin on our servers. Moving the status of trees that can no longer be 
modified to FROZEN will help reduce this load.
  • Loading branch information
lukebp authored Aug 9, 2022
1 parent 35c283d commit be5a17f
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 5 deletions.
114 changes: 114 additions & 0 deletions politeiad/backendv2/tstorebe/tstore/freeze.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Copyright (c) 2022 The Decred developers
// Use of this source code is governed by an ISC
// license that can be found in the LICENSE file.

package tstore

import (
"errors"

backend "github.com/decred/politeia/politeiad/backendv2"
"github.com/google/trillian"
)

// freezeTreeCheck checks if any trillian trees meet the requirements to be
// frozen. If they do, their status is updated in trillian to frozen.
//
// A frozen trillian tree can no longer be appended to. The trillian_log_signer
// will no longer poll the MySQL database for updates to a tree once it has
// been marked as frozen. This reduces the load on the server and helps prevent
// the CPUs from spinning.
//
// A record is marked as frozen when it can no longer be updated, such as when
// a record status is set to archived. The trillian tree, however, cannot be
// frozen until the record is frozen AND a final dcr timestamp has been added
// to tree. This means that we cannot simply freeze the tree at the same time
// that the record is frozen since it will still need to be timestamped one
// last time.
func (t *Tstore) freezeTreeCheck() error {
log.Infof("Checking if any trillian trees can be frozen")

trees, err := t.tlog.TreesAll()
if err != nil {
return err
}

active := make([]*trillian.Tree, 0, len(trees))
for _, v := range trees {
if v.TreeState == trillian.TreeState_ACTIVE {
active = append(active, v)
}
}

log.Infof("%v/%v active trillian trees found", len(active), len(trees))

var frozen int
for _, tree := range active {
freeze, err := t.treeShouldBeFrozen(tree)
if err != nil {
log.Errorf("treeShouldBeFrozen %v: %v", tree.TreeId, err)
continue
}
if !freeze {
// Tree shouldn't be frozen. Nothing else to do.
continue
}
_, err = t.tlog.TreeFreeze(tree.TreeId)
if err != nil {
return err
}

log.Infof("Tree frozen %v %x", tree.TreeId, tokenFromTreeID(tree.TreeId))

frozen++
}

log.Infof("%v trees were frozen; %v active trees remaining",
frozen, len(active)-frozen)

return nil
}

// treeShouldBeFrozen returns whether a trillian tree meets the requirements to
// have it's status updated from ACTIVE to FROZEN. The requirments are that the
// tree is currently active, the record saved to the tree has been frozen, and
// a final dcr timestamp has been added to the tree.
func (t *Tstore) treeShouldBeFrozen(tree *trillian.Tree) (bool, error) {
if tree.TreeState != trillian.TreeState_ACTIVE {
return false, nil
}
leaves, err := t.tlog.LeavesAll(tree.TreeId)
if err != nil {
return false, err
}
if len(leaves) == 0 {
return false, nil
}
r, err := t.recordIndexLatest(leaves)
switch {
case errors.Is(err, backend.ErrRecordNotFound):
// A record index doesn't exist on this tree
return false, nil
case err != nil:
return false, err
}
if !r.Frozen {
// The record has not been frozen yet
return false, nil
}
// The record has been frozen. Check for a final
// timestamp leaf.
lastLeaf := leaves[len(leaves)-1]
d, err := extraDataDecode(lastLeaf.ExtraData)
if err != nil {
return false, err
}
if d.Desc != dataDescriptorAnchor {
// The tree still needs a final timestamp.
return false, nil
}
// The record has been frozen and a final timestamp
// has been added to the tree. The tree can now be
// frozen.
return true, nil
}
17 changes: 13 additions & 4 deletions politeiad/backendv2/tstorebe/tstore/tstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,16 @@ func (t *Tstore) fullLengthToken(token []byte) ([]byte, error) {

// Fsck performs a filesystem check on the tstore.
func (t *Tstore) Fsck(allTokens [][]byte) error {
// Set tree status to frozen for any trees that are frozen and have
// been anchored one last time.
// Verify all file blobs have been deleted for censored records.
err := t.anchorTrees()
if err != nil {
return err
}
err = t.freezeTreeCheck()
if err != nil {
return err
}

// Run plugin fscks's
// Run the plugin fscks
for _, pluginID := range t.pluginIDs() {
p, _ := t.plugin(pluginID)

Expand Down Expand Up @@ -258,6 +263,10 @@ func New(appDir, dataDir string, anp *chaincfg.Params, tlogHost, dbHost, dbPass,
if err != nil {
log.Errorf("anchorTrees: %v", err)
}
err = t.freezeTreeCheck()
if err != nil {
log.Errorf("freeTreeCheck: %v", err)
}
})
if err != nil {
return nil, err
Expand Down
2 changes: 1 addition & 1 deletion politeiad/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ type config struct {
RPCUser string `long:"rpcuser" description:"RPC user name for privileged commands"`
RPCPass string `long:"rpcpass" description:"RPC password for privileged commands"`
DcrtimeHost string `long:"dcrtimehost" description:"Dcrtime ip:port"`
DcrtimeCert string // Provided in env variable "DCRTIMECERT"
DcrtimeCert string `long:"dcrtimecert" description:"Dcrtime HTTPS certificate"`
Identity string `long:"identity" description:"File containing the politeiad identity file"`
Backend string `long:"backend" description:"Backend type"`
Fsck bool `long:"fsck" description:"Perform filesystem checks on all record and plugin data"`
Expand Down

0 comments on commit be5a17f

Please sign in to comment.