Skip to content

Commit fcb640e

Browse files
author
Nikolas De Giorgis
authored
CLOUDP-64640: fix deploy operator (#55)
1 parent 643db47 commit fcb640e

File tree

3 files changed

+115
-42
lines changed

3 files changed

+115
-42
lines changed

scripts/dev/build_and_deploy_operator.py

+13-27
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
import io
22
import os
3-
import time
43
from typing import Dict, Optional
54

65
import yaml
76
from kubernetes import client, config
8-
from kubernetes.client.rest import ApiException
97

108
from dev_config import DevConfig, load_config
119
from dockerfile_generator import render
1210
from dockerutil import build_and_push_image
1311

12+
from k8sutil import wait_for_condition, ignore_if_already_exists, ignore_if_doesnt_exist
13+
1414

1515
def _load_operator_service_account() -> Optional[Dict]:
1616
return load_yaml_from_file("deploy/service_account.yaml")
@@ -51,6 +51,17 @@ def _ensure_crds():
5151
lambda: crdv1.delete_custom_resource_definition("mongodb.mongodb.com")
5252
)
5353

54+
# Make sure that the CRD has being deleted before trying to create it again
55+
if not wait_for_condition(
56+
lambda: crdv1.list_custom_resource_definition(
57+
field_selector="metadata.name==mongodb.mongodb.com"
58+
),
59+
lambda crd_list: len(crd_list.items) == 0,
60+
timeout=5,
61+
sleep_time=0.5,
62+
):
63+
raise Exception("Execution timed out while waiting for the CRD to be deleted")
64+
5465
# TODO: fix this, when calling create_custom_resource_definition, we get the error
5566
# ValueError("Invalid value for `conditions`, must not be `None`")
5667
# but the crd is still successfully created
@@ -70,31 +81,6 @@ def build_and_push_operator(repo_url: str, tag: str, path: str):
7081
return build_and_push_image(repo_url, tag, path, "operator")
7182

7283

73-
def _ignore_error_codes(fn, codes):
74-
try:
75-
fn()
76-
except ApiException as e:
77-
if e.status not in codes:
78-
raise
79-
80-
81-
def ignore_if_already_exists(fn):
82-
"""
83-
ignore_if_already_exists accepts a function and calls it,
84-
ignoring an Kubernetes API conflict errors
85-
"""
86-
87-
return _ignore_error_codes(fn, [409])
88-
89-
90-
def ignore_if_doesnt_exist(fn):
91-
"""
92-
ignore_if_doesnt_exist accepts a function and calls it,
93-
ignoring an Kubernetes API not found errors
94-
"""
95-
return _ignore_error_codes(fn, [404])
96-
97-
9884
def deploy_operator():
9985
"""
10086
deploy_operator ensures the CRDs are created, and als creates all the required ServiceAccounts, Roles

scripts/dev/e2e.py

+32-15
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@
44
from build_and_deploy_operator import (
55
build_and_push_operator,
66
deploy_operator,
7+
load_yaml_from_file,
8+
)
9+
from k8sutil import (
10+
wait_for_condition,
711
ignore_if_doesnt_exist,
812
ignore_if_already_exists,
9-
load_yaml_from_file,
10-
) # TODO: put these function somewhere else
13+
)
1114
from dockerutil import build_and_push_image
1215
from typing import Dict, Optional
1316
from dev_config import load_config
@@ -140,9 +143,35 @@ def create_test_runner_pod(test: str):
140143
dev_config = load_config()
141144
corev1 = client.CoreV1Api()
142145
pod_body = _get_testrunner_pod_body(test)
146+
147+
if not wait_for_condition(
148+
lambda: corev1.list_namespaced_pod(
149+
dev_config.namespace, field_selector=f"metadata.name=={TEST_RUNNER_NAME}"
150+
),
151+
lambda pod_list: len(pod_list.items) == 0,
152+
timeout=10,
153+
sleep_time=0.5,
154+
):
155+
156+
raise Exception(
157+
"Execution timed out while waiting for the existing pod to be deleted"
158+
)
159+
143160
return corev1.create_namespaced_pod(dev_config.namespace, body=pod_body)
144161

145162

163+
def wait_for_pod_to_be_running(corev1, name, namespace):
164+
print("Waiting for pod to be running")
165+
if not wait_for_condition(
166+
lambda: corev1.read_namespaced_pod(name, namespace),
167+
lambda pod: pod.status.phase == "Running",
168+
sleep_time=5,
169+
timeout=50,
170+
exceptions_to_ignore=ApiException,
171+
):
172+
raise Exception("Pod never got into Running state!")
173+
174+
146175
def _get_testrunner_pod_body(test: str) -> Dict:
147176
dev_config = load_config()
148177
return {
@@ -173,19 +202,6 @@ def _get_testrunner_pod_body(test: str) -> Dict:
173202
}
174203

175204

176-
def wait_for_pod_to_be_running(corev1, name, namespace):
177-
print("Waiting for pod to be running")
178-
for i in range(10):
179-
try:
180-
pod = corev1.read_namespaced_pod(name, namespace)
181-
if pod.status.phase == "Running":
182-
return True
183-
except ApiException as e:
184-
pass
185-
time.sleep(5)
186-
raise Exception("Pod never got into Running state!")
187-
188-
189205
def parse_args():
190206
parser = argparse.ArgumentParser()
191207
parser.add_argument("--test", help="Name of the test to run")
@@ -222,6 +238,7 @@ def main():
222238

223239
pod = create_test_runner_pod(args.test)
224240
corev1 = client.CoreV1Api()
241+
225242
wait_for_pod_to_be_running(corev1, TEST_RUNNER_NAME, dev_config.namespace)
226243

227244
# stream all of the pod output as the pod is running

scripts/dev/k8sutil.py

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import time
2+
3+
from kubernetes.client.rest import ApiException
4+
5+
# time to sleep between retries
6+
SLEEP_TIME = 2
7+
# no timeout (loop forever)
8+
INFINITY = -1
9+
10+
11+
def _current_milliseconds() -> int:
12+
return int(round(time.time() * 1000))
13+
14+
15+
def wait_for_condition(
16+
fn,
17+
condition,
18+
exceptions_to_ignore=None,
19+
codes_to_ignore=None,
20+
sleep_time=SLEEP_TIME,
21+
timeout=INFINITY,
22+
) -> bool:
23+
"""
24+
wait_for_condition accepts a function fn and a function condition,
25+
it periodically calls the function fn and then applies the condition function on the result
26+
until it returns True or we reach timeout
27+
28+
exceptions_to_ignore is a tuple of Exceptions to ignore is raised by the call to fn
29+
If ApiException is not ignored, if raised by the call to fn codes in codes_to_ignore are ignored
30+
"""
31+
start_time = _current_milliseconds()
32+
end = start_time + (timeout * 1000)
33+
34+
while _current_milliseconds() < end or timeout <= 0:
35+
res = None
36+
try:
37+
res = _ignore_error_codes(fn, codes_to_ignore)
38+
except exceptions_to_ignore:
39+
pass
40+
if res is not None and condition(res):
41+
return True
42+
43+
time.sleep(sleep_time)
44+
45+
return False
46+
47+
48+
def _ignore_error_codes(fn, codes):
49+
try:
50+
return fn()
51+
except ApiException as e:
52+
if e.status not in codes:
53+
raise
54+
55+
56+
def ignore_if_already_exists(fn):
57+
"""
58+
ignore_if_already_exists accepts a function and calls it,
59+
ignoring an Kubernetes API conflict errors
60+
"""
61+
62+
return _ignore_error_codes(fn, [409])
63+
64+
65+
def ignore_if_doesnt_exist(fn):
66+
"""
67+
ignore_if_doesnt_exist accepts a function and calls it,
68+
ignoring an Kubernetes API not found errors
69+
"""
70+
return _ignore_error_codes(fn, [404])

0 commit comments

Comments
 (0)