Skip to content

Commit

Permalink
backup: support multiple schedules (#5633)
Browse files Browse the repository at this point in the history
  • Loading branch information
BornChanger authored May 23, 2024
1 parent ad855fe commit e2d06e6
Show file tree
Hide file tree
Showing 5 changed files with 430 additions and 18 deletions.
11 changes: 10 additions & 1 deletion pkg/apis/label/label.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ const (
// BackupScheduleLabelKey is backup schedule key
BackupScheduleLabelKey string = "tidb.pingcap.com/backup-schedule"

// BackupScheduleGroupLabelKey is backup schedule group key
BackupScheduleGroupLabelKey string = "tidb.pingcap.com/backup-schedule-group"

// BackupLabelKey is backup key
BackupLabelKey string = "tidb.pingcap.com/backup"

Expand Down Expand Up @@ -286,7 +289,7 @@ func NewRestore() Label {
}
}

// NewBackupSchedule initialize a new Label for backups of bakcup schedule
// NewBackupSchedule initialize a new Label for backups of backup schedule
func NewBackupSchedule() Label {
return Label{
NameLabelKey: BackupScheduleJobLabelVal,
Expand Down Expand Up @@ -323,6 +326,12 @@ func NewGroup() Label {
}
}

func NewBackupScheduleGroup(val string) Label {
return Label{
BackupScheduleGroupLabelKey: val,
}
}

// Instance adds instance kv pair to label
func (l Label) Instance(name string) Label {
l[InstanceLabelKey] = name
Expand Down
54 changes: 45 additions & 9 deletions pkg/backup/backupschedule/backup_schedule_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,20 +115,56 @@ func (bm *backupScheduleManager) canPerformNextBackup(bs *v1alpha1.BackupSchedul
ns := bs.GetNamespace()
bsName := bs.GetName()

backup, err := bm.deps.BackupLister.Backups(ns).Get(bs.Status.LastBackup)
if err != nil {
if errors.IsNotFound(err) {
// If this backup schedule has specified label of backup schedule group, then we need to check the last backup of the group.
// Otherwise, check its own last backup.
bsGroupName := bs.GetLabels()[label.BackupScheduleGroupLabelKey]

if bsGroupName == "" {
backup, err := bm.deps.BackupLister.Backups(ns).Get(bs.Status.LastBackup)
if err != nil {
if errors.IsNotFound(err) {
return nil
}
return fmt.Errorf("backup schedule %s/%s, get backup %s failed, err: %v", ns, bsName, bs.Status.LastBackup, err)
}

if v1alpha1.IsBackupComplete(backup) || (v1alpha1.IsBackupScheduled(backup) && v1alpha1.IsBackupFailed(backup)) {
return nil
}
return fmt.Errorf("backup schedule %s/%s, get backup %s failed, err: %v", ns, bsName, bs.Status.LastBackup, err)
// skip this sync round of the backup schedule and waiting the last backup.
return controller.RequeueErrorf("backup schedule %s/%s, the last backup %s is still running", ns, bsName, bs.Status.LastBackup)
}

if v1alpha1.IsBackupComplete(backup) || (v1alpha1.IsBackupScheduled(backup) && v1alpha1.IsBackupFailed(backup)) {
return nil
// Check the last backup of the group
backupScheduleGroupLabels := label.NewBackupScheduleGroup(bsGroupName)
selector, err := backupScheduleGroupLabels.Selector()
if err != nil {
return fmt.Errorf("generate backup schedule group %s label selector failed, err: %v", bsGroupName, err)
}

bss, err := bm.deps.BackupScheduleLister.BackupSchedules(ns).List(selector)
if err != nil {
return fmt.Errorf("backup schedule %s/%s, list backup schedules failed, err: %v", ns, bsName, err)
}

for _, bsMember := range bss {
// The check is not safe in fact since we don't have strict serialization
backup, err := bm.deps.BackupLister.Backups(ns).Get(bsMember.Status.LastBackup)
if err != nil {
if errors.IsNotFound(err) {
continue
}
return fmt.Errorf("backup schedule %s/%s, get backup %s failed, err: %v", ns, bsName, bsMember.Status.LastBackup, err)
}

if v1alpha1.IsBackupComplete(backup) || (v1alpha1.IsBackupScheduled(backup) && v1alpha1.IsBackupFailed(backup)) {
continue
}
// skip this sync round of the backup schedule and waiting the last backup.
return controller.RequeueErrorf("backup schedule %s/%s, the last backup %s is still running", ns, bsName, bsMember.Status.LastBackup)
}
// If the last backup is in a failed state, but it is not scheduled yet,
// skip this sync round of the backup schedule and waiting the last backup.
return controller.RequeueErrorf("backup schedule %s/%s, the last backup %s is still running", ns, bsName, bs.Status.LastBackup)

return nil
}

func (bm *backupScheduleManager) performLogBackupIfNeeded(bs *v1alpha1.BackupSchedule) error {
Expand Down
158 changes: 158 additions & 0 deletions pkg/backup/backupschedule/backup_schedule_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,140 @@ func TestManager(t *testing.T) {
g.Expect(err).Should(BeNil())
helper.checkBacklist(bs.Namespace, 2, true)
}
func TestMultiSchedules(t *testing.T) {
g := NewGomegaWithT(t)
helper := newHelper(t)
defer helper.close()
deps := helper.deps
m := NewBackupScheduleManager(deps).(*backupScheduleManager)
var err error
bs1 := &v1alpha1.BackupSchedule{}
bs1.Namespace = "ns"
bs1.Name = "bsname1"
bs1.Status.LastBackup = "bs1_backupname"

bk1 := &v1alpha1.Backup{}
bk1.Namespace = bs1.Namespace
bk1.Name = bs1.Status.LastBackup
bk1.Status.Conditions = append(bk1.Status.Conditions, v1alpha1.BackupCondition{
Type: v1alpha1.BackupComplete,
Status: v1.ConditionTrue,
})
helper.createBackup(bk1)
helper.createBackupSchedule(bs1)

err = m.canPerformNextBackup(bs1)
g.Expect(err).Should(BeNil())

// create another schedule, without the special label
bs2 := &v1alpha1.BackupSchedule{}
bs2.Namespace = "ns"
bs2.Name = "bsname2"
bs2.Status.LastBackup = "bs2_backupname"

// test backup complete
bk2 := &v1alpha1.Backup{}
bk2.Namespace = bs2.Namespace
bk2.Name = bs2.Status.LastBackup
bk2.Status.Conditions = append(bk2.Status.Conditions, v1alpha1.BackupCondition{
Type: v1alpha1.BackupComplete,
Status: v1.ConditionTrue,
})
helper.createBackup(bk2)
helper.createBackupSchedule(bs2)
err = m.canPerformNextBackup(bs2)
g.Expect(err).Should(BeNil())
helper.deleteBackup(bk1)
helper.deleteBackup(bk2)
helper.deleteBackupSchedule(bs1)
helper.deleteBackupSchedule(bs2)

// make 2 schedules in the same group, but neither has active backup
bs11 := &v1alpha1.BackupSchedule{}
bs11.Namespace = "ns"
bs11.Name = "bsname11"
bs11.Labels = label.NewBackupScheduleGroup("group1")
bs11.Status.LastBackup = "bs11_backupname"

bk11 := &v1alpha1.Backup{}
bk11.Namespace = bs11.Namespace
bk11.Name = bs11.Status.LastBackup
bk11.Status.Conditions = append(bk11.Status.Conditions, v1alpha1.BackupCondition{
Type: v1alpha1.BackupComplete,
Status: v1.ConditionTrue,
})
helper.createBackup(bk11)
helper.createBackupSchedule(bs11)
err = m.canPerformNextBackup(bs11)
g.Expect(err).Should(BeNil())

// create another schedule
bs12 := &v1alpha1.BackupSchedule{}
bs12.Namespace = "ns"
bs12.Name = "bsname12"
bs12.Labels = label.NewBackupScheduleGroup("group1")
bs12.Status.LastBackup = "bs12_backupname"

// test backup complete
bk12 := &v1alpha1.Backup{}
bk12.Namespace = bs12.Namespace
bk12.Name = bs12.Status.LastBackup
bk12.Status.Conditions = append(bk12.Status.Conditions, v1alpha1.BackupCondition{
Type: v1alpha1.BackupComplete,
Status: v1.ConditionTrue,
})
helper.createBackup(bk12)
helper.createBackupSchedule(bs12)
err = m.canPerformNextBackup(bs12)
g.Expect(err).Should(BeNil())
helper.deleteBackup(bk11)
helper.deleteBackup(bk12)
helper.deleteBackupSchedule(bs11)
helper.deleteBackupSchedule(bs12)

// make 2 schedules in the same group, has conflicting backup
bs21 := &v1alpha1.BackupSchedule{}
bs21.Namespace = "ns"
bs21.Name = "bsname21"
bs21.Labels = label.NewBackupScheduleGroup("group2")
bs21.Status.LastBackup = "bs21_backupname"

bk21 := &v1alpha1.Backup{}
bk21.Namespace = bs21.Namespace
bk21.Name = bs21.Status.LastBackup
bk21.Status.Conditions = append(bk21.Status.Conditions, v1alpha1.BackupCondition{
Type: v1alpha1.BackupRunning,
Status: v1.ConditionTrue,
})
helper.createBackup(bk21)
helper.createBackupSchedule(bs21)
err = m.canPerformNextBackup(bs21)
g.Expect(err.Error()).Should(MatchRegexp("backup schedule ns/bsname21, the last backup bs21_backupname is still running"))

// create another schedule
bs22 := &v1alpha1.BackupSchedule{}
bs22.Namespace = "ns"
bs22.Name = "bsname22"
bs22.Labels = label.NewBackupScheduleGroup("group2")
bs22.Status.LastBackup = "bs22_backupname"

// test backup complete
bk22 := &v1alpha1.Backup{}
bk22.Namespace = bs22.Namespace
bk22.Name = bs22.Status.LastBackup
bk22.Status.Conditions = append(bk22.Status.Conditions, v1alpha1.BackupCondition{
Type: v1alpha1.BackupComplete,
Status: v1.ConditionTrue,
})
helper.createBackup(bk22)
helper.createBackupSchedule(bs22)
err = m.canPerformNextBackup(bs22)
g.Expect(err.Error()).Should(MatchRegexp("backup schedule ns/bsname22, the last backup bs21_backupname is still running"))
helper.deleteBackup(bk21)
helper.deleteBackup(bk22)
helper.deleteBackupSchedule(bs21)
helper.deleteBackupSchedule(bs22)
}

func TestGetLastScheduledTime(t *testing.T) {
g := NewGomegaWithT(t)
Expand Down Expand Up @@ -471,6 +605,30 @@ func (h *helper) deleteBackup(bk *v1alpha1.Backup) {
}, time.Second*10).ShouldNot(BeNil())
}

func (h *helper) createBackupSchedule(bk *v1alpha1.BackupSchedule) {
t := h.t
deps := h.deps
g := NewGomegaWithT(t)
_, err := deps.Clientset.PingcapV1alpha1().BackupSchedules(bk.Namespace).Create(context.TODO(), bk, metav1.CreateOptions{})
g.Expect(err).Should(BeNil())
g.Eventually(func() error {
_, err := deps.BackupScheduleLister.BackupSchedules(bk.Namespace).Get(bk.Name)
return err
}, time.Second*10).Should(BeNil())
}

func (h *helper) deleteBackupSchedule(bk *v1alpha1.BackupSchedule) {
t := h.t
deps := h.deps
g := NewGomegaWithT(t)
err := deps.Clientset.PingcapV1alpha1().BackupSchedules(bk.Namespace).Delete(context.TODO(), bk.Name, metav1.DeleteOptions{})
g.Expect(err).Should(BeNil())
g.Eventually(func() error {
_, err := deps.BackupScheduleLister.BackupSchedules(bk.Namespace).Get(bk.Name)
return err
}, time.Second*10).ShouldNot(BeNil())
}

func fakeBackup(ts *int64) *v1alpha1.Backup {
backup := &v1alpha1.Backup{}
if ts == nil {
Expand Down
52 changes: 44 additions & 8 deletions pkg/fedvolumebackup/backupschedule/backup_schedule_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,19 +194,55 @@ func (bm *backupScheduleManager) canPerformNextBackup(vbs *v1alpha1.VolumeBackup
ns := vbs.GetNamespace()
bsName := vbs.GetName()

backup, err := bm.deps.VolumeBackupLister.VolumeBackups(ns).Get(vbs.Status.LastBackup)
if err != nil {
if errors.IsNotFound(err) {
// If this backup schedule has specified label of backup schedule group, then we need to check the last backup of the group.
// Otherwise, check its own last backup.
bsGroupName := vbs.GetLabels()[label.BackupScheduleGroupLabelKey]

if bsGroupName == "" {
backup, err := bm.deps.VolumeBackupLister.VolumeBackups(ns).Get(vbs.Status.LastBackup)
if err != nil {
if errors.IsNotFound(err) {
return nil
}
return fmt.Errorf("backup schedule %s/%s, get backup %s failed, err: %v", ns, bsName, vbs.Status.LastBackup, err)
}

if v1alpha1.IsVolumeBackupComplete(backup) || v1alpha1.IsVolumeBackupFailed(backup) {
return nil
}
return fmt.Errorf("backup schedule %s/%s, get backup %s failed, err: %v", ns, bsName, vbs.Status.LastBackup, err)
// skip this sync round of the backup schedule and waiting the last backup.
return controller.RequeueErrorf("backup schedule %s/%s, the last backup %s is still running", ns, bsName, vbs.Status.LastBackup)
}

if v1alpha1.IsVolumeBackupComplete(backup) || v1alpha1.IsVolumeBackupFailed(backup) {
return nil
// Check the last backup of the group
backupScheduleGroupLabels := label.NewBackupScheduleGroup(bsGroupName)
selector, err := backupScheduleGroupLabels.Selector()
if err != nil {
return fmt.Errorf("generate backup schedule group %s label selector failed, err: %v", bsGroupName, err)
}
vbss, err := bm.deps.VolumeBackupScheduleLister.VolumeBackupSchedules(ns).List(selector)
if err != nil {
return fmt.Errorf("backup schedule %s/%s, list backup schedules failed, err: %v", ns, bsName, err)
}
// skip this sync round of the backup schedule and waiting the last backup.
return controller.RequeueErrorf("backup schedule %s/%s, the last backup %s is still running", ns, bsName, vbs.Status.LastBackup)

for _, vbsMember := range vbss {
// The check is not safe in fact since we don't have strict serialization
backup, err := bm.deps.VolumeBackupLister.VolumeBackups(ns).Get(vbsMember.Status.LastBackup)
if err != nil {
if errors.IsNotFound(err) {
continue
}
return fmt.Errorf("backup schedule %s/%s, get backup %s failed, err: %v", ns, bsName, vbs.Status.LastBackup, err)
}

if v1alpha1.IsVolumeBackupComplete(backup) || v1alpha1.IsVolumeBackupFailed(backup) {
continue
}
// skip this sync round of the backup schedule and waiting the last backup.
return controller.RequeueErrorf("backup schedule %s/%s, the last backup %s is still running", ns, bsName, vbsMember.Status.LastBackup)
}

return nil
}

func (bm *backupScheduleManager) backupGC(vbs *v1alpha1.VolumeBackupSchedule) {
Expand Down
Loading

0 comments on commit e2d06e6

Please sign in to comment.