@@ -23,62 +23,82 @@ import (
23
23
func TestFallback (tt * testing.T ) {
24
24
t := commontesthelpers .NewE (tt )
25
25
cs := getClients (t )
26
+ ctx , cancel := context .WithTimeout (context .Background (), 20 * time .Minute )
27
+ defer cancel ()
26
28
27
29
t .Log ("Starting the fallback test" )
28
30
clusterStateWaitPollTimeout , clusterMustBeReadyFor , waitForFallbackDegradedConditionTimeout := fallbackTimeoutsForCurrentPlatform (t , cs )
29
31
30
32
// before starting a new test make sure the current state of the cluster is good
31
- ensureClusterInGoodState (t , cs , clusterStateWaitPollTimeout , clusterMustBeReadyFor )
33
+ ensureClusterInGoodState (ctx , t , cs , clusterStateWaitPollTimeout , clusterMustBeReadyFor )
32
34
33
35
// cause a disruption
34
36
cfg := getDefaultUnsupportedConfigForCurrentPlatform (t , cs )
35
37
cfg ["apiServerArguments" ] = map [string ][]string {"non-existing-flag" : {"true" }}
36
38
setUnsupportedConfig (t , cs , cfg )
37
39
38
40
// validate if the fallback condition is reported and the cluster is stable
39
- waitForFallbackDegradedCondition (t , cs , waitForFallbackDegradedConditionTimeout )
41
+ waitForFallbackDegradedCondition (ctx , t , cs , waitForFallbackDegradedConditionTimeout )
40
42
nodeName , failedRevision := assertFallbackOnNodeStatus (t , cs )
41
43
assertKasPodAnnotatedOnNode (t , cs , failedRevision , nodeName )
42
44
43
- // clean up
45
+ // clean up and some extra time is needed to wait for the KAS operator to be ready
44
46
setUnsupportedConfig (t , cs , getDefaultUnsupportedConfigForCurrentPlatform (t , cs ))
45
- err := waitForClusterInGoodState (t , cs , clusterStateWaitPollTimeout , clusterMustBeReadyFor )
47
+ err := waitForClusterInGoodState (ctx , t , cs , clusterStateWaitPollTimeout , 5 * clusterMustBeReadyFor )
46
48
require .NoError (t , err )
47
49
}
48
50
49
51
// ensureClusterInGoodState makes sure the cluster is not progressing for mustBeReadyFor period
50
52
// in addition in an HA env it applies getDefaultUnsupportedConfigForCurrentPlatform so that the feature is enabled before the tests starts
51
- func ensureClusterInGoodState (t testing.TB , cs clientSet , waitPollTimeout , mustBeReadyFor time.Duration ) {
53
+ func ensureClusterInGoodState (ctx context. Context , t testing.TB , cs clientSet , waitPollTimeout , mustBeReadyFor time.Duration ) {
52
54
setUnsupportedConfig (t , cs , getDefaultUnsupportedConfigForCurrentPlatform (t , cs ))
53
- err := waitForClusterInGoodState (t , cs , waitPollTimeout , mustBeReadyFor )
55
+ err := waitForClusterInGoodState (ctx , t , cs , waitPollTimeout , mustBeReadyFor )
54
56
require .NoError (t , err )
55
57
}
56
58
57
59
// waitForClusterInGoodState checks if the cluster is not progressing
58
- func waitForClusterInGoodState (t testing.TB , cs clientSet , waitPollTimeout , mustBeReadyFor time.Duration ) error {
60
+ func waitForClusterInGoodState (ctx context. Context , t testing.TB , cs clientSet , waitPollTimeout , mustBeReadyFor time.Duration ) error {
59
61
t .Helper ()
60
62
61
63
startTs := time .Now ()
62
- t .Logf ("Waiting %s for the cluster to be in a good condition, interval = 10s, timeout %v" , mustBeReadyFor .String (), waitPollTimeout )
64
+ t .Logf ("Waiting %s for the cluster to be in a good condition, interval = 20s, timeout %v" , mustBeReadyFor .String (), waitPollTimeout )
65
+
66
+ return wait .PollUntilContextTimeout (ctx , 20 * time .Second , waitPollTimeout , true , func (cxt context.Context ) (bool , error ) {
67
+ // Check if the context is already done to prevent unnecessary API calls
68
+ select {
69
+ case <- ctx .Done ():
70
+ t .Logf ("Context canceled before cluster reached good state: %v" , ctx .Err ())
71
+ return false , ctx .Err () // Return the context error to indicate cancellation
72
+ default :
73
+ }
63
74
64
- return wait . Poll ( 10 * time . Second , waitPollTimeout , func () ( bool , error ) {
65
- ckaso , err := cs .Operator .Get (context . TODO () , "cluster" , metav1.GetOptions {})
75
+ // Use ctx instead of context.TODO()
76
+ ckaso , err := cs .Operator .Get (ctx , "cluster" , metav1.GetOptions {})
66
77
if err != nil {
67
78
t .Log (err )
68
79
return false , nil /*retry*/
69
80
}
70
81
82
+ // Check if any node is still progressing
71
83
for _ , ns := range ckaso .Status .NodeStatuses {
72
84
if ckaso .Status .LatestAvailableRevision != ns .CurrentRevision || ns .TargetRevision > 0 {
73
- t .Logf ("Node %s is progressing, latestAvailableRevision: %v, currentRevision: %v, targetRevision: %v" , ns .NodeName , ckaso .Status .LatestAvailableRevision , ns .CurrentRevision , ns .TargetRevision )
85
+ t .Logf ("Node %s is progressing, latestAvailableRevision: %v, currentRevision: %v, targetRevision: %v" ,
86
+ ns .NodeName , ckaso .Status .LatestAvailableRevision , ns .CurrentRevision , ns .TargetRevision )
74
87
return false , nil /*retry*/
75
88
}
76
89
}
77
90
78
- if time .Since (startTs ) > mustBeReadyFor {
91
+ // Verify operator conditions
92
+ ckasoAvailable := v1helpers .IsOperatorConditionTrue (ckaso .Status .Conditions , "StaticPodsAvailable" )
93
+ ckasoNotProgressing := v1helpers .IsOperatorConditionFalse (ckaso .Status .Conditions , "NodeInstallerProgressing" )
94
+ ckasoNotDegraded := v1helpers .IsOperatorConditionFalse (ckaso .Status .Conditions , "NodeControllerDegraded" )
95
+
96
+ // If cluster has been stable for the required time, return success
97
+ if time .Since (startTs ) > mustBeReadyFor && ckasoAvailable && ckasoNotProgressing && ckasoNotDegraded {
79
98
t .Logf ("The cluster has been in good condition for %s" , mustBeReadyFor .String ())
80
99
return true , nil /*done*/
81
100
}
101
+
82
102
return false , nil /*wait a bit more*/
83
103
})
84
104
}
@@ -108,11 +128,11 @@ func setUnsupportedConfig(t testing.TB, cs clientSet, cfg map[string]interface{}
108
128
}
109
129
110
130
// waitForFallbackDegradedCondition waits until StaticPodFallbackRevisionDegraded condition is set to true
111
- func waitForFallbackDegradedCondition (t testing.TB , cs clientSet , waitPollTimeout time.Duration ) {
131
+ func waitForFallbackDegradedCondition (ctx context. Context , t testing.TB , cs clientSet , waitPollTimeout time.Duration ) {
112
132
t .Helper ()
113
133
114
134
t .Logf ("Waiting for StaticPodFallbackRevisionDegraded condition, interval = 20s, timeout = %v" , waitPollTimeout )
115
- err := wait .Poll ( 20 * time .Second , waitPollTimeout , func () (bool , error ) {
135
+ err := wait .PollUntilContextTimeout ( ctx , 20 * time .Second , waitPollTimeout , true , func (cxt context. Context ) (bool , error ) {
116
136
ckaso , err := cs .Operator .Get (context .TODO (), "cluster" , metav1.GetOptions {})
117
137
if err != nil {
118
138
t .Logf ("unable to get kube-apiserver-operator resource: %v" , err )
@@ -236,5 +256,5 @@ func fallbackTimeoutsForCurrentPlatform(t testing.TB, cs clientSet) (time.Durati
236
256
including the time the server needs to become ready and be noticed by a Load Balancer
237
257
longer duration allows as to collect logs and the must-gather
238
258
*/
239
- return 10 * time .Minute /*clusterStateWaitPollInterval*/ , 1 * time .Minute /*clusterMustBeReadyFor*/ , 10 * time .Minute /*waitForFallbackDegradedConditionTimeout*/
259
+ return 10 * time .Minute /*clusterStateWaitPollInterval*/ , 1 * time .Minute /*clusterMustBeReadyFor*/ , 18 * time .Minute /*waitForFallbackDegradedConditionTimeout*/
240
260
}
0 commit comments