Skip to content

Commit

Permalink
s: add workload indexing - add logging
Browse files Browse the repository at this point in the history
  • Loading branch information
Michal-Leszczynski committed Oct 2, 2024
1 parent 9a07227 commit 0efed73
Showing 1 changed file with 49 additions and 2 deletions.
51 changes: 49 additions & 2 deletions pkg/service/restore/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ func (w *tablesWorker) indexLocationWorkload(ctx context.Context, location Locat
return LocationWorkload{}, errors.Wrap(err, "filter already restored sstables")
}
}
return aggregateLocationWorkload(rawWorkload), nil
workload := aggregateLocationWorkload(rawWorkload)
w.logWorkloadInfo(ctx, workload)
return workload, nil
}

func (w *tablesWorker) createRemoteDirWorkloads(ctx context.Context, location Location) ([]RemoteDirWorkload, error) {
Expand Down Expand Up @@ -115,7 +117,9 @@ func (w *tablesWorker) createRemoteDirWorkloads(ctx context.Context, location Lo
Size: size,
SSTables: remoteSSTables,
}
rawWorkload = append(rawWorkload, workload)
if size > 0 {
rawWorkload = append(rawWorkload, workload)
}
return nil
})
})
Expand Down Expand Up @@ -212,6 +216,49 @@ func (w *tablesWorker) initMetrics(workload []LocationWorkload) {
}, float64(totalSize-workloadSize)/float64(totalSize)*100)
}

func (w *tablesWorker) logWorkloadInfo(ctx context.Context, workload LocationWorkload) {
if workload.Size == 0 {
return
}
var locMax, locCnt int64
for _, twl := range workload.Tables {
if twl.Size == 0 {
continue
}
var tabMax, tabCnt int64
for _, rdwl := range twl.RemoteDirs {
if rdwl.Size == 0 {
continue
}
var dirMax int64
for _, sst := range rdwl.SSTables {
dirMax = max(dirMax, sst.Size)
}
dirCnt := int64(len(rdwl.SSTables))
w.logger.Info(ctx, "Remote sstable dir workload info",
"path", rdwl.RemoteSSTableDir,
"max size", dirMax,
"average size", rdwl.Size/dirCnt,
"count", dirCnt)
tabCnt += dirCnt
tabMax = max(tabMax, dirMax)
}
w.logger.Info(ctx, "Table workload info",
"keyspace", twl.Keyspace,
"table", twl.Table,
"max size", tabMax,
"average size", twl.Size/tabCnt,
"count", tabCnt)
locCnt += tabCnt
locMax = max(locMax, tabMax)
}
w.logger.Info(ctx, "Location workload info",
"location", workload.Location.String(),
"max size", locMax,
"average size", workload.Size/locCnt,
"count", locCnt)
}

func aggregateLocationWorkload(rawWorkload []RemoteDirWorkload) LocationWorkload {
remoteDirWorkloads := make(map[TableName][]RemoteDirWorkload)
for _, rw := range rawWorkload {
Expand Down

0 comments on commit 0efed73

Please sign in to comment.