Skip to content

Commit

Permalink
feat(backup): extends manifest with info needed for 1-to-1 restore.
Browse files Browse the repository at this point in the history
This adds following data to the backup manifest:
General:
  cluster_id: uuid of the cluster
  dc: data center name
  rack: rack from the scylla configuration
  node_id: id of the scylla node (equals to host id)
  task_id: uuid of the backup task
  snapshot_tag: snapshot tag
  shard_count: number of shard on scylla node
  cpu_count: number of cpus on scylla node
  storage_size: total size of the disk in bytes
Instance Details:
  cloud_provider: aws|gcp|azure or empty in case of on-premise
  instance_type: instance type, e.g. t2.nano or empty when on-premise

Fixes: #4130
  • Loading branch information
VAveryanov8 committed Jan 10, 2025
1 parent 969ee86 commit a592901
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@
"rpc_port":"9160",
"sstable_uuid_format":true,
"consistent_cluster_management":true,
"enable_tablets":true
}
"enable_tablets":true,
"data_directory": "/var/lib/scylla/data"
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@
"rpc_address":"192.168.100.101",
"rpc_port":"9160",
"sstable_uuid_format":false,
"consistent_cluster_management":false
}
"consistent_cluster_management":false,
"data_directory": "/var/lib/scylla/data"
}
31 changes: 25 additions & 6 deletions pkg/service/backup/backupspec/manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,31 @@ func (m *ManifestInfo) fileNameParser(v string) error {

// ManifestContent is structure containing information about the backup.
type ManifestContent struct {
Version string `json:"version"`
ClusterName string `json:"cluster_name"`
IP string `json:"ip"`
Size int64 `json:"size"`
Tokens []int64 `json:"tokens"`
Schema string `json:"schema"`
Version string `json:"version"`
ClusterName string `json:"cluster_name"`
IP string `json:"ip"`
Size int64 `json:"size"`
Tokens []int64 `json:"tokens"`
Schema string `json:"schema"`
Rack string `json:"rack"`
ShardCount int `json:"shard_count"`
CPUCount int `json:"cpu_count"`
StorageSize uint64 `json:"storage_size"`
InstanceDetails InstanceDetails `json:"instance_details"`

// Fields below are also persent in the manifest file path.
DC string `json:"dc"`
ClusterID uuid.UUID `json:"cluster_id"`
NodeID string `json:"node_id"`
TaskID uuid.UUID `json:"task_id"`
SnapshotTag string `json:"snapshot_tag"`
}

// InstanceDetails extends backup manifest with additional instance details.
// Mainly needed for 1-to-1 restore.
type InstanceDetails struct {
CloudProvider string `json:"cloud_provider,omitempty"`
InstanceType string `json:"instance_type,omitempty"`
}

// ManifestContentWithIndex is structure containing information about the backup
Expand Down
72 changes: 72 additions & 0 deletions pkg/service/backup/service_backup_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"context"
"encoding/json"
"fmt"
"io"
"math/rand"
"net/http"
"os"
Expand All @@ -28,6 +29,7 @@ import (
"github.com/scylladb/gocqlx/v2"
"github.com/scylladb/scylla-manager/v3/pkg/service/cluster"
"github.com/scylladb/scylla-manager/v3/pkg/util"
"github.com/scylladb/scylla-manager/v3/swagger/gen/agent/models"
"go.uber.org/atomic"
"go.uber.org/zap/zapcore"

Expand Down Expand Up @@ -643,6 +645,27 @@ func TestBackupSmokeIntegration(t *testing.T) {
t.Fatal(err)
}

// Mocking /cloud/metadata endpoint as it's not expected to work reliably on ci.
// But with mock we can at least check that response is used correctly and saved to manifest file.
h.Hrt.SetInterceptor(httpx.RoundTripperFunc(func(req *http.Request) (*http.Response, error) {
if req.URL.Path != "/agent/cloud/metadata" {
return nil, nil
}

metaMock := models.InstanceMetadata{
CloudProvider: "test_provider",
InstanceType: "test_instance_type",
}

metaMockBytes, err := json.Marshal(metaMock)
if err != nil {
t.Fatal(err)
}
resp := httpx.MakeResponse(req, http.StatusOK)
resp.Body = io.NopCloser(bytes.NewReader(metaMockBytes))
return resp, nil
}))

Print("When: run backup")
if err := h.service.Backup(ctx, h.ClusterID, h.TaskID, h.RunID, target); err != nil {
t.Fatal(err)
Expand Down Expand Up @@ -836,6 +859,55 @@ func assertManifestHasCorrectFormat(t *testing.T, ctx context.Context, h *backup
if !strset.New(schemas...).Has(mc.Schema) {
t.Errorf("Schema=%s, not found in schemas %s", mc.Schema, schemas)
}

var infoFromPath ManifestInfo
if err := infoFromPath.ParsePath(manifestPath); err != nil {
t.Fatal("manifest file in wrong path", manifestPath)
}

if mc.DC != infoFromPath.DC {
t.Errorf("DC=%s, expected %s", mc.DC, infoFromPath.DC)
}

if mc.ClusterID.String() != infoFromPath.ClusterID.String() {
t.Errorf("ClustedID=%s, expected %s", mc.ClusterID, infoFromPath.ClusterID)
}

if mc.NodeID != infoFromPath.NodeID {
t.Errorf("NodeID=%s, expected %s", mc.NodeID, infoFromPath.NodeID)
}

if mc.TaskID.String() != infoFromPath.TaskID.String() {
t.Errorf("TaskID=%s, expected %s", mc.TaskID, infoFromPath.TaskID)
}

if mc.SnapshotTag != infoFromPath.SnapshotTag {
t.Errorf("SnapshotTag=%s, expected %s", mc.SnapshotTag, infoFromPath.SnapshotTag)
}

if mc.Rack != "rack1" {
t.Errorf("Rack=%s, expected rack1", mc.Rack)
}

if mc.ShardCount == 0 {
t.Errorf("ShardCount=0, expected > 0")
}

if mc.CPUCount == 0 {
t.Errorf("CPUCount=0, expected > 0")
}

if mc.StorageSize == 0 {
t.Errorf("StorageSize=0, expected > 0")
}

if mc.InstanceDetails.InstanceType != "test_instance_type" {
t.Errorf("InstanceDetails.InstanceType=%s, expected test_instance_type", mc.InstanceDetails.InstanceType)
}

if mc.InstanceDetails.CloudProvider != "test_provider" {
t.Errorf("InstanceDetails.CloudProvider=%s, expected aws", mc.InstanceDetails.CloudProvider)
}
}

func TestBackupWithNodesDownIntegration(t *testing.T) {
Expand Down
42 changes: 38 additions & 4 deletions pkg/service/backup/worker_manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,14 @@ func (w *worker) createAndUploadHostManifest(ctx context.Context, h hostInfo) er
return err
}

m := w.createTemporaryManifest(h, tokens)
m, err := w.createTemporaryManifest(ctx, h, tokens)
if err != nil {
return errors.Wrap(err, "create temp manifest")
}
return w.uploadHostManifest(ctx, h, m)
}

func (w *worker) createTemporaryManifest(h hostInfo, tokens []int64) ManifestInfoWithContent {
func (w *worker) createTemporaryManifest(ctx context.Context, h hostInfo, tokens []int64) (ManifestInfoWithContent, error) {
m := &ManifestInfo{
Location: h.Location,
DC: h.DC,
Expand All @@ -65,9 +68,14 @@ func (w *worker) createTemporaryManifest(h hostInfo, tokens []int64) ManifestInf
c := &ManifestContentWithIndex{
ManifestContent: ManifestContent{
Version: "v2",
ClusterName: w.ClusterName,
IP: h.IP,
Tokens: tokens,
ClusterName: w.ClusterName,
DC: h.DC,
ClusterID: w.ClusterID,
NodeID: h.ID,
TaskID: w.TaskID,
SnapshotTag: w.SnapshotTag,
},
Index: make([]FilesMeta, len(dirs)),
}
Expand All @@ -88,10 +96,36 @@ func (w *worker) createTemporaryManifest(h hostInfo, tokens []int64) ManifestInf
c.Size += d.Progress.Size
}

rack, err := w.Client.HostRack(ctx, h.IP)
if err != nil {
return ManifestInfoWithContent{}, errors.Wrap(err, "client.HostRack")
}
c.Rack = rack

shardCound, err := w.Client.ShardCount(ctx, h.IP)
if err != nil {
return ManifestInfoWithContent{}, errors.Wrap(err, "client.ShardCount")
}
c.ShardCount = int(shardCound)

// VA_TODO: candidate for #3892 (but only after #4181 gets fixed...).
nodeInfo, err := w.Client.NodeInfo(ctx, h.IP)
if err != nil {
return ManifestInfoWithContent{}, errors.Wrap(err, "client.NodeInfo")
}
c.CPUCount = int(nodeInfo.CPUCount)
c.StorageSize = nodeInfo.StorageSize

instanceMeta, err := w.Client.CloudMetadata(ctx, h.IP)
if err != nil {
return ManifestInfoWithContent{}, errors.Wrap(err, "client.CloudMetadata")
}
c.InstanceDetails = InstanceDetails(instanceMeta)

return ManifestInfoWithContent{
ManifestInfo: m,
ManifestContentWithIndex: c,
}
}, nil
}

func (w *worker) uploadHostManifest(ctx context.Context, h hostInfo, m ManifestInfoWithContent) error {
Expand Down

0 comments on commit a592901

Please sign in to comment.