diff --git a/datastore/datastore.go b/datastore/datastore.go index 14c96fb1..878419b5 100644 --- a/datastore/datastore.go +++ b/datastore/datastore.go @@ -790,25 +790,28 @@ type StorageSummary struct { DataUUID string RootUUID string Bytes uint64 - KeyUsage map[int]int + KeyUsage storage.KeyUsage } // GetStorageSummary returns JSON for all the data instances in the stores. -func GetStorageSummary() (string, error) { +func GetStorageSummary() { stores, err := storage.AllStores() if err != nil { - return "", err + dvid.Errorf("Error getting all stores: %v\n", err) + return } breakdown := make(map[string]map[dvid.InstanceID]StorageSummary, len(stores)) for alias, store := range stores { sizes, err := storage.GetDataSizes(store, nil) if err != nil { - return "", err + dvid.Errorf("Error getting data sizes for store %s: %v\n", store, err) + continue } keyUsage, err := storage.GetStoreKeyUsage(store) if err != nil { - return "", err + dvid.Errorf("Error getting key usage for store %s: %v\n", store, err) + continue } if sizes == nil && keyUsage == nil { continue @@ -854,10 +857,11 @@ func GetStorageSummary() (string, error) { // Convert data to JSON string m, err := json.MarshalIndent(breakdown, "", " ") if err != nil { - return "", err + dvid.Errorf("Error marshalling storage summary to JSON: %v\n", err) + dvid.Infof("Storage summary:\n%s\n", string(m)) + } else { + dvid.Infof("Storage summary:\n%v\n", breakdown) } - dvid.Infof("Key usage:\n%s\n", string(m)) - return string(m), nil } // LogRepoOpToKafka logs a repo operation to kafka diff --git a/server/web.go b/server/web.go index 12525b09..a233f7ae 100644 --- a/server/web.go +++ b/server/web.go @@ -1516,14 +1516,15 @@ func latenciesHandler(w http.ResponseWriter, r *http.Request) { fmt.Fprint(w, string(m)) } -func serverStorageHandler(w http.ResponseWriter, r *http.Request) { - jsonStr, err := datastore.GetStorageSummary() - if err != nil { - BadRequest(w, r, err) +func serverStorageHandler(c *web.C, w http.ResponseWriter, r *http.Request) { + adminPriv := c.Env["adminPriv"].(bool) + if !adminPriv { + BadRequest(w, r, "Storage summary takes considerable resources and is only available to admin users.") return } - w.Header().Set("Content-Type", "application/json") - fmt.Fprint(w, jsonStr) + + go datastore.GetStorageSummary() + fmt.Fprint(w, "Storage summary requested. Check log for details.\n") } func serverInfoHandler(w http.ResponseWriter, r *http.Request) { diff --git a/storage/badger/badger.go b/storage/badger/badger.go index 960ad33e..5b8b4ba6 100644 --- a/storage/badger/badger.go +++ b/storage/badger/badger.go @@ -290,12 +290,17 @@ func (db *BadgerDB) metadataExists() (bool, error) { // ---- KeyUsageViewer interface ------ -func (db *BadgerDB) GetKeyUsage(ranges []storage.KeyRange) (hitsPerInstance []map[int]int, err error) { +type versionTracker struct { + versions int + tombstones int +} + +func (db *BadgerDB) GetKeyUsage(ranges []storage.KeyRange) (hitsPerInstance []storage.KeyUsage, err error) { if db == nil { err = fmt.Errorf("can't call GetKeyUsage on nil BadgerDB") return } - hitsPerInstance = make([]map[int]int, len(ranges)) + hitsPerInstance = make([]storage.KeyUsage, len(ranges)) err = db.bdp.View(func(txn *badger.Txn) error { opts := badger.DefaultIteratorOptions opts.PrefetchValues = false @@ -303,25 +308,31 @@ func (db *BadgerDB) GetKeyUsage(ranges []storage.KeyRange) (hitsPerInstance []ma defer it.Close() dvid.Infof("Checking key usage for Badger @ %s ...\n", db.directory) for i, kr := range ranges { - // Allocate histogram for this key range (i.e., a data instance) - hitsPerInstance[i] = make(map[int]int) - // Iterate and get all kv across versions for each key. maxVersionKey := storage.MaxVersionDataKeyFromKey(kr.Start) - numVersions := 1 + keyUsage := make(storage.KeyUsage) + versions := 0 + tombstones := 0 for it.Seek(kr.Start); it.Valid(); it.Next() { kv := new(storage.KeyValue) item := it.Item() kv.K = item.KeyCopy(nil) storage.StoreKeyBytesRead <- len(kv.K) - // Add version to the stats for this key. + // If we now are in another TKey, record stats and reset version histogram. if bytes.Compare(kv.K, maxVersionKey) > 0 { + tKeyClass := uint8(kv.K[5]) + keyUsage.Add(tKeyClass, versions, tombstones) + maxVersionKey = storage.MaxVersionDataKeyFromKey(kv.K) - hitsPerInstance[i][numVersions]++ - numVersions = 0 + keyUsage = make(storage.KeyUsage) + versions = 0 + tombstones = 0 + } + versions++ + if kv.K.IsTombstone() { + tombstones++ } - numVersions++ // Did we pass the final key? if bytes.Compare(kv.K, kr.OpenEnd) > 0 { @@ -329,6 +340,7 @@ func (db *BadgerDB) GetKeyUsage(ranges []storage.KeyRange) (hitsPerInstance []ma } } + hitsPerInstance[i] = keyUsage } dvid.Infof("Key usage for Badger @ %s:\n %v\n", db.directory, hitsPerInstance) return nil diff --git a/storage/keyvalue.go b/storage/keyvalue.go index 4173792f..2ebefdc5 100644 --- a/storage/keyvalue.go +++ b/storage/keyvalue.go @@ -448,7 +448,7 @@ func getInstanceSizes(sv SizeViewer, instances []dvid.InstanceID) (map[dvid.Inst return sizes, nil } -func getKeyUsage(vw KeyUsageViewer, instances []dvid.InstanceID) (map[dvid.InstanceID]map[int]int, error) { +func getKeyUsage(vw KeyUsageViewer, instances []dvid.InstanceID) (map[dvid.InstanceID]KeyUsage, error) { ranges := make([]KeyRange, len(instances)) for i, curID := range instances { beg := constructDataKey(curID, 0, 0, minTKey) @@ -462,7 +462,7 @@ func getKeyUsage(vw KeyUsageViewer, instances []dvid.InstanceID) (map[dvid.Insta if len(s) != len(instances) { return nil, fmt.Errorf("only got back %d instance key usages, not the requested %d instances", len(s), len(instances)) } - keyUsage := make(map[dvid.InstanceID]map[int]int, len(instances)) + keyUsage := make(map[dvid.InstanceID]KeyUsage, len(instances)) for i, curID := range instances { keyUsage[curID] = s[i] } diff --git a/storage/storage.go b/storage/storage.go index d4a611e5..b48597c9 100644 --- a/storage/storage.go +++ b/storage/storage.go @@ -168,18 +168,51 @@ func Repair(name, path string) error { return repairer.Repair(path) } +// VersionHistogram is a map of # versions to # keys that have that many versions. +type VersionHistogram map[int]int + +func (vh VersionHistogram) Clone() VersionHistogram { + clone := make(VersionHistogram, len(vh)) + for k, v := range vh { + clone[k] = v + } + return clone +} + +// KeyUsage is a map of TKeyClass to VersionHistogram. +type KeyUsage map[uint8]VersionHistogram + +func (ku KeyUsage) Clone() KeyUsage { + clone := make(KeyUsage, len(ku)) + for k, v := range ku { + clone[k] = v.Clone() + } + return clone +} + +// Add adds a key with the # versions and tombstones. +func (ku KeyUsage) Add(class uint8, versions int, tombstones int) { + vh, found := ku[class] + if !found { + vh = make(VersionHistogram) + ku[class] = vh + } + vh[versions]++ + vh[0] += tombstones +} + // KeyUsageViewer stores can return how many keys are stored and a histogram of the // number of versions per key for each data instance given by the key ranges. type KeyUsageViewer interface { - GetKeyUsage(ranges []KeyRange) (histPerInstance []map[int]int, err error) + GetKeyUsage(ranges []KeyRange) (histPerInstance []KeyUsage, err error) } -// GetStoreKeyUsage returns a histogram of the number of versions per key for each +// GetStoreKeyUsage returns a histogram map[# versions][# keys] for each // data instance in the store. -func GetStoreKeyUsage(store dvid.Store) (map[dvid.InstanceID]map[int]int, error) { +func GetStoreKeyUsage(store dvid.Store) (map[dvid.InstanceID]KeyUsage, error) { db, ok := store.(OrderedKeyValueGetter) if !ok { - dvid.Infof("Cannot get data sizes for store %s, which is not an OrderedKeyValueGetter store\n", db) + dvid.Infof("Cannot get key usage for store %s, which is not an OrderedKeyValueGetter store\n", db) return nil, nil } viewer, ok := store.(KeyUsageViewer)