Skip to content

Commit b6b7345

Browse files
authored
OTA-861: Set Upgradeable=False when there is an upgrade in progress (#1080)
* inhibit the 2nd minor version upgrade * Block Y stream upgrade if any upgrade is in progress This commits entends the guard on the 2nd Y-stream upgrade, i.e., blocking an Y-stream upgrade if there is already an Y-stream upgrade in progress, to the guard on any Y-stream upgrade if there is already an upgrade in progress, reguardless of Y-stream or Z-stream. For example, it covers the case 4.14.15-> 4.14.35 -> 4.15.29 where the upgrade 4.14.35 -> 4.15.29 is blocked until the upgrade 4.14.15-> 4.14.35 completes. Note that we still allow for upgrade to 4.y+1.z'' in the middle of upgrade 4.y.z -> 4.y+1.z', even though direct upgrade 4.y.z -> 4.y+1.z'' might not be supported. This is because the ugprade 4.y.z -> 4.y+1.z' might not be completed up to a bug in 4.y+1.z' that has a fix in 4.(y+1).z''. We need the retarget to it to land 4.y+1 on the cluster. * Generate accepted risks in an unblocking case This commit generates a message in the accepted risks for the unblocking case 4.y.z -> 4.y+1.z' -> 4.y+1.z''. * Use Progressing=True to block minor level upgrade * Drop the guard on UpgradeInProgress The guard will be added on another pull request. * Make the type UpgradeableUpgradeInProgress private * Improve condition's reason and message
1 parent 820b74a commit b6b7345

File tree

4 files changed

+148
-23
lines changed

4 files changed

+148
-23
lines changed

pkg/cvo/cvo.go

+27-19
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,9 @@ func (optr *Operator) Run(runContext context.Context, shutdownContext context.Co
449449
resultChannelCount++
450450
go func() {
451451
defer utilruntime.HandleCrash()
452-
wait.UntilWithContext(runContext, func(runContext context.Context) { optr.worker(runContext, optr.upgradeableQueue, optr.upgradeableSync) }, time.Second)
452+
wait.UntilWithContext(runContext, func(runContext context.Context) {
453+
optr.worker(runContext, optr.upgradeableQueue, optr.upgradeableSyncFunc(false))
454+
}, time.Second)
453455
resultChannel <- asyncResult{name: "upgradeable"}
454456
}()
455457

@@ -459,6 +461,10 @@ func (optr *Operator) Run(runContext context.Context, shutdownContext context.Co
459461
wait.UntilWithContext(runContext, func(runContext context.Context) {
460462
// run the worker, then when the queue is closed sync one final time to flush any pending status
461463
optr.worker(runContext, optr.queue, func(runContext context.Context, key string) error { return optr.sync(runContext, key) })
464+
// This is to ensure upgradeableCondition to be synced and thus to avoid the race caused by the throttle
465+
if err := optr.upgradeableSyncFunc(true)(shutdownContext, optr.queueKey()); err != nil {
466+
utilruntime.HandleError(fmt.Errorf("unable to perform final upgradeable sync: %v", err))
467+
}
462468
if err := optr.sync(shutdownContext, optr.queueKey()); err != nil {
463469
utilruntime.HandleError(fmt.Errorf("unable to perform final sync: %v", err))
464470
}
@@ -743,27 +749,29 @@ func (optr *Operator) availableUpdatesSync(ctx context.Context, key string) erro
743749
return optr.syncAvailableUpdates(ctx, config)
744750
}
745751

746-
// upgradeableSync is triggered on cluster version change (and periodic requeues) to
752+
// upgradeableSyncFunc returns a function that is triggered on cluster version change (and periodic requeues) to
747753
// sync upgradeableCondition. It only modifies cluster version.
748-
func (optr *Operator) upgradeableSync(_ context.Context, key string) error {
749-
startTime := time.Now()
750-
klog.V(2).Infof("Started syncing upgradeable %q", key)
751-
defer func() {
752-
klog.V(2).Infof("Finished syncing upgradeable %q (%v)", key, time.Since(startTime))
753-
}()
754+
func (optr *Operator) upgradeableSyncFunc(ignoreThrottlePeriod bool) func(_ context.Context, key string) error {
755+
return func(_ context.Context, key string) error {
756+
startTime := time.Now()
757+
klog.V(2).Infof("Started syncing upgradeable %q", key)
758+
defer func() {
759+
klog.V(2).Infof("Finished syncing upgradeable %q (%v)", key, time.Since(startTime))
760+
}()
754761

755-
config, err := optr.cvLister.Get(optr.name)
756-
if apierrors.IsNotFound(err) {
757-
return nil
758-
}
759-
if err != nil {
760-
return err
761-
}
762-
if errs := validation.ValidateClusterVersion(config); len(errs) > 0 {
763-
return nil
764-
}
762+
config, err := optr.cvLister.Get(optr.name)
763+
if apierrors.IsNotFound(err) {
764+
return nil
765+
}
766+
if err != nil {
767+
return err
768+
}
769+
if errs := validation.ValidateClusterVersion(config); len(errs) > 0 {
770+
return nil
771+
}
765772

766-
return optr.syncUpgradeable(config)
773+
return optr.syncUpgradeable(config, ignoreThrottlePeriod)
774+
}
767775
}
768776

769777
// isOlderThanLastUpdate returns true if the cluster version is older than

pkg/cvo/cvo_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -3868,7 +3868,7 @@ func TestOperator_upgradeableSync(t *testing.T) {
38683868
waitForCm(t, cms)
38693869
}
38703870

3871-
err = optr.upgradeableSync(ctx, optr.queueKey())
3871+
err = optr.upgradeableSyncFunc(false)(ctx, optr.queueKey())
38723872
if err != nil && tt.wantErr == nil {
38733873
t.Fatalf("Operator.sync() unexpected error: %v", err)
38743874
}

pkg/cvo/upgradeable.go

+32-3
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ func defaultUpgradeableCheckIntervals() upgradeableCheckIntervals {
6161

6262
// syncUpgradeable synchronizes the upgradeable status only if the sufficient time passed since its last update. This
6363
// throttling period is dynamic and is driven by upgradeableCheckIntervals.
64-
func (optr *Operator) syncUpgradeable(cv *configv1.ClusterVersion) error {
65-
if u := optr.getUpgradeable(); u != nil {
64+
func (optr *Operator) syncUpgradeable(cv *configv1.ClusterVersion, ignoreThrottlePeriod bool) error {
65+
if u := optr.getUpgradeable(); u != nil && !ignoreThrottlePeriod {
6666
throttleFor := optr.upgradeableCheckIntervals.throttlePeriod(cv)
6767
if earliestNext := u.At.Add(throttleFor); time.Now().Before(earliestNext) {
6868
klog.V(2).Infof("Upgradeability last checked %s ago, will not re-check until %s", time.Since(u.At), earliestNext.Format(time.RFC3339))
@@ -176,7 +176,7 @@ func (optr *Operator) getUpgradeable() *upgradeable {
176176
}
177177

178178
type upgradeableCheck interface {
179-
// returns a not-nil condition when the check fails.
179+
// Check returns a not-nil condition that should be addressed before a minor level upgrade when the check fails.
180180
Check() *configv1.ClusterOperatorStatusCondition
181181
}
182182

@@ -262,6 +262,34 @@ func (check *clusterVersionOverridesUpgradeable) Check() *configv1.ClusterOperat
262262
return cond
263263
}
264264

265+
type upgradeInProgressUpgradeable struct {
266+
name string
267+
cvLister configlistersv1.ClusterVersionLister
268+
}
269+
270+
func (check *upgradeInProgressUpgradeable) Check() *configv1.ClusterOperatorStatusCondition {
271+
cond := &configv1.ClusterOperatorStatusCondition{
272+
Type: "UpgradeableUpgradeInProgress",
273+
Status: configv1.ConditionTrue,
274+
}
275+
276+
cv, err := check.cvLister.Get(check.name)
277+
if meta.IsNoMatchError(err) || apierrors.IsNotFound(err) {
278+
return nil
279+
} else if err != nil {
280+
klog.Error(err)
281+
return nil
282+
}
283+
284+
if progressingCondition := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, configv1.OperatorProgressing); progressingCondition != nil &&
285+
progressingCondition.Status == configv1.ConditionTrue {
286+
cond.Reason = "UpdateInProgress"
287+
cond.Message = fmt.Sprintf("An update is already in progress and the details are in the %s condition", configv1.OperatorProgressing)
288+
return cond
289+
}
290+
return nil
291+
}
292+
265293
type clusterManifestDeleteInProgressUpgradeable struct {
266294
}
267295

@@ -420,6 +448,7 @@ func (optr *Operator) defaultUpgradeableChecks() []upgradeableCheck {
420448
},
421449
&clusterOperatorsUpgradeable{coLister: optr.coLister},
422450
&clusterManifestDeleteInProgressUpgradeable{},
451+
&upgradeInProgressUpgradeable{name: optr.name, cvLister: optr.cvLister},
423452
}
424453
}
425454

pkg/cvo/upgradeable_test.go

+88
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ import (
44
"testing"
55
"time"
66

7+
"github.com/google/go-cmp/cmp"
78
configv1 "github.com/openshift/api/config/v1"
9+
"github.com/openshift/client-go/config/clientset/versioned/fake"
810
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
911
)
1012

@@ -57,3 +59,89 @@ func TestUpgradeableCheckIntervalsThrottlePeriod(t *testing.T) {
5759
})
5860
}
5961
}
62+
63+
func TestUpgradeInProgressUpgradeable(t *testing.T) {
64+
testCases := []struct {
65+
name string
66+
cv *configv1.ClusterVersion
67+
expected *configv1.ClusterOperatorStatusCondition
68+
}{
69+
{
70+
name: "empty conditions",
71+
cv: &configv1.ClusterVersion{
72+
ObjectMeta: metav1.ObjectMeta{
73+
Name: "unit-test",
74+
},
75+
},
76+
},
77+
{
78+
name: "progressing is true",
79+
cv: &configv1.ClusterVersion{
80+
ObjectMeta: metav1.ObjectMeta{
81+
Name: "unit-test",
82+
},
83+
Status: configv1.ClusterVersionStatus{
84+
Conditions: []configv1.ClusterOperatorStatusCondition{
85+
{
86+
Type: configv1.OperatorProgressing,
87+
Status: configv1.ConditionTrue,
88+
Reason: "UpdateInProgress",
89+
Message: "An update is already in progress and the details are in the Progressing condition",
90+
},
91+
},
92+
},
93+
},
94+
expected: &configv1.ClusterOperatorStatusCondition{
95+
Type: "UpgradeableUpgradeInProgress",
96+
Status: configv1.ConditionTrue,
97+
Reason: "UpdateInProgress",
98+
Message: "An update is already in progress and the details are in the Progressing condition",
99+
},
100+
},
101+
{
102+
name: "progressing is false",
103+
cv: &configv1.ClusterVersion{
104+
ObjectMeta: metav1.ObjectMeta{
105+
Name: "unit-test",
106+
},
107+
Status: configv1.ClusterVersionStatus{
108+
Conditions: []configv1.ClusterOperatorStatusCondition{
109+
{
110+
Type: configv1.OperatorProgressing,
111+
Status: configv1.ConditionFalse,
112+
Reason: "a",
113+
Message: "b",
114+
},
115+
},
116+
},
117+
},
118+
},
119+
{
120+
name: "progressing has no status",
121+
cv: &configv1.ClusterVersion{
122+
ObjectMeta: metav1.ObjectMeta{
123+
Name: "unit-test",
124+
},
125+
Status: configv1.ClusterVersionStatus{
126+
Conditions: []configv1.ClusterOperatorStatusCondition{
127+
{
128+
Type: configv1.OperatorProgressing,
129+
Reason: "a",
130+
Message: "b",
131+
},
132+
},
133+
},
134+
},
135+
},
136+
}
137+
for _, tc := range testCases {
138+
t.Run(tc.name, func(t *testing.T) {
139+
client := fake.NewSimpleClientset(tc.cv)
140+
unit := upgradeInProgressUpgradeable{name: "unit-test", cvLister: &clientCVLister{client: client}}
141+
actual := unit.Check()
142+
if diff := cmp.Diff(tc.expected, actual); diff != "" {
143+
t.Errorf("%s differs from expected:\n%s", tc.name, diff)
144+
}
145+
})
146+
}
147+
}

0 commit comments

Comments
 (0)