-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathperformance.py
447 lines (368 loc) · 18.5 KB
/
performance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
import os
import time
import argparse
import subprocess
from concurrent.futures import ThreadPoolExecutor, as_completed
def run_command(command, cwd=None):
try:
result = subprocess.run(command, check=True, capture_output=True, text=True, shell=True, cwd=cwd)
print(result.stdout)
except subprocess.CalledProcessError as e:
print(f"Command failed with exit code {e.returncode}")
print(f"Error output:\n{e.stderr}")
exit(1)
# Step 1: set up the EKS cluster with Terraform and AWS CLI commands
def setup_cluster(node_count):
terraform_dir = os.path.join("terraform-test-clusters", "EKS")
try:
print("Initializing Terraform...")
run_command('terraform init', terraform_dir)
print("Planning Terraform configuration...")
run_command('terraform plan', terraform_dir)
node_count = node_count +1
print(f"Applying Terraform configuration with {node_count} nodes...")
run_command(f'terraform apply -auto-approve -var=desired_size={node_count}', terraform_dir)
return node_count
except subprocess.CalledProcessError as e:
print(f"Failed to set up EKS cluster with exit code {e.returncode}")
print(f"Error output:\n{e.stderr}")
exit(1)
def connect_to_eks_cluster(region, cluster_name, terraform_dir):
try:
# Print the command that will be executed
command = f"aws eks --region {region} update-kubeconfig --name {cluster_name}"
print(f"Executing: {command}")
# Execute the command
subprocess.run(command, check=True, shell=True)
print("Successfully connected to the EKS cluster.")
except subprocess.CalledProcessError as e:
print(f"Failed to connect to EKS cluster with exit code {e.returncode}")
print(f"Error output:\n{e.stderr}")
exit(1)
def deploy_kube_prometheus_stack():
try:
# Define paths relative to the script's directory
values_file = "./Monitoring/values/kube-prometheus-stack.yaml"
chart_path = "./Monitoring/kube-prometheus-stack"
# Construct the Helm command
helm_command = (
f"helm upgrade --install kube-prometheus-stack "
f"-f {values_file} "
f"{chart_path} "
f"-n monitoring --create-namespace"
)
# Run the command
print("Deploying kube-prometheus-stack using Helm...")
run_command(helm_command)
print("kube-prometheus-stack deployed successfully.")
except subprocess.CalledProcessError as e:
print(f"Failed to deploy kube-prometheus-stack with exit code {e.returncode}")
print(f"Error output:\n{e.stderr}")
exit(1)
def deploy_pyroscope():
try:
# Define paths relative to the script's directory
values_file = "./Monitoring/pyroscope/dev-env-values.yaml"
chart_path = "./Monitoring/pyroscope"
# Construct the Helm command
helm_command = (
f"helm upgrade --install pyroscope "
f"-f {values_file} "
f"{chart_path} "
f"-n monitoring --create-namespace"
)
# Run the command
print("Deploying Pyroscope using Helm...")
run_command(helm_command)
print("Pyroscope deployed successfully.")
except subprocess.CalledProcessError as e:
print(f"Failed to deploy Pyroscope with exit code {e.returncode}")
print(f"Error output:\n{e.stderr}")
exit(1)
def create_namespace(namespace_name):
try:
subprocess.run(['kubectl', 'create', 'namespace', namespace_name], check=True)
print(f"Created namespace: {namespace_name}")
return namespace_name
except subprocess.CalledProcessError as e:
print(f"Failed to create namespace {namespace_name} with exit code {e.returncode}")
print(f"Error output:\n{e.stderr}")
return None
def create_parallel_namespaces(node_count, skip_cluster=False):
try:
if skip_cluster:
# Get the number of nodes in the cluster
result = subprocess.run(
['kubectl', 'get', 'nodes', '--no-headers'],
check=True, capture_output=True, text=True
)
total_nodes = len(result.stdout.splitlines())
num_namespaces = (total_nodes - 2) * 2
else:
# Calculate the number of namespaces to create
num_namespaces = (node_count - 2) * 2
print(f"Creating {num_namespaces} namespaces")
# Create namespace names list
namespace_list = [f"namespace-{i+1}" for i in range(num_namespaces)]
# Use ThreadPoolExecutor to create namespaces in parallel
with ThreadPoolExecutor(max_workers=10) as executor:
future_to_namespace = {executor.submit(create_namespace, ns): ns for ns in namespace_list}
created_namespaces = []
for future in as_completed(future_to_namespace):
namespace = future_to_namespace[future]
result = future.result()
if result:
created_namespaces.append(result)
print("All namespaces created successfully.")
return created_namespaces
except subprocess.CalledProcessError as e:
print(f"Failed to create namespaces with exit code {e.returncode}")
print(f"Error output:\n{e.stderr}")
exit(1)
def apply_microservices_demo(namespaces):
microservices_demo_path = os.path.join("microservices-demo", "release", "kubernetes-manifests.yaml")
for namespace in namespaces:
print(f"Applying microservices-demo to namespace {namespace}...")
try:
run_command(f'kubectl apply -f {microservices_demo_path} -n {namespace}')
print(f"Successfully applied microservices-demo to namespace {namespace}.")
except subprocess.CalledProcessError as e:
print(f"Failed to apply microservices-demo to namespace {namespace}: {e}")
exit(1)
# Function to run kubectl apply for a single namespace
def apply_microservices_demo_to_namespace(namespace, microservices_demo_path):
print(f"Applying microservices-demo to namespace {namespace}...")
try:
result = subprocess.run(f'kubectl apply -f {microservices_demo_path} -n {namespace}',
check=True, capture_output=True, text=True, shell=True)
print(f"Successfully applied microservices-demo to namespace {namespace}.")
return True
except subprocess.CalledProcessError as e:
print(f"Failed to apply microservices-demo to namespace {namespace}: {e.stderr}")
return False
# Function to apply the microservices demo to all namespaces in parallel
def apply_microservices_demo(namespaces):
microservices_demo_path = os.path.join("microservices-demo", "release", "kubernetes-manifests.yaml")
# Use ThreadPoolExecutor for parallel execution
with ThreadPoolExecutor(max_workers=min(20, len(namespaces))) as executor: # Limit to 20 workers
futures = {executor.submit(apply_microservices_demo_to_namespace, ns, microservices_demo_path): ns for ns in namespaces}
for future in as_completed(futures):
namespace = futures[future]
try:
result = future.result() # Will raise exception if apply failed
if result:
print(f"Namespace {namespace}: Applied successfully.")
else:
print(f"Namespace {namespace}: Failed to apply.")
except Exception as e:
print(f"Namespace {namespace}: Exception occurred: {e}")
# Step 2: Deploy Kubescape using Helm
def deploy_kubescape(
account: str,
accessKey: str,
version: str = None,
enable_kdr: bool = False,
additional_helm_command: str = None,
storage_image_tag: str = None,
node_agent_image_tag: str = None,
private_node_agent: str = None
):
try:
print("Adding Kubescape Helm repository...")
run_command('helm repo add kubescape https://kubescape.github.io/helm-charts/')
run_command('helm repo update')
print("Deploying Kubescape Operator...")
cluster_context = subprocess.run(['kubectl', 'config', 'current-context'], check=True, capture_output=True, text=True).stdout.strip()
# quay_password = os.environ.get("QUAYIO_REGISTRY_PASSWORD")
# quay_username = os.environ.get("QUAYIO_REGISTRY_USERNAME")
# if quay_password and quay_username:
# print("Environment variables are correctly set.")
# else:
# print("QUAYIO_REGISTRY_PASSWORD or QUAYIO_REGISTRY_USERNAME not set in environment.")
# print(f"QUAYIO_REGISTRY_PASSWORD: {quay_password}")
# print(f"QUAYIO_REGISTRY_USERNAME: {quay_username}")
# quay_password = os.environ.get("QUAYIO_REGISTRY_PASSWORD")
# quay_username = os.environ.get("QUAYIO_REGISTRY_USERNAME")
# if not quay_password or not quay_username:
# raise ValueError("QUAYIO_REGISTRY_PASSWORD or QUAYIO_REGISTRY_USERNAME not set in environment.")
helm_command = (
f'helm upgrade --install kubescape kubescape/kubescape-operator '
f'-n kubescape --create-namespace '
f'--set clusterName={cluster_context} '
f'--set account={account} '
f'--set accessKey={accessKey} '
f'--set server=api.armosec.io '
f'--set nodeAgent.env[0].name=PYROSCOPE_SERVER_SVC '
f'--set nodeAgent.env[0].value=http://pyroscope-distributor.monitoring.svc.cluster.local.:4040'
)
if version:
helm_command += f' --version {version}'
# Add storage image repository and tag if provided
if storage_image_tag:
helm_command += f' --set storage.image.tag={storage_image_tag} --set storage.image.repository=quay.io/kubescape/storage'
# Add node agent image repository and tag if provided
if node_agent_image_tag:
helm_command += f' --set nodeAgent.image.tag={node_agent_image_tag} --set nodeAgent.image.repository=quay.io/kubescape/node-agent'
# Add the additional Helm parameters if -kdr is enabled
if enable_kdr:
additional_params = (
' --set alertCRD.installDefault=true '
' --set capabilities.manageWorkloads=enable '
' --set capabilities.nodeProfileService=enable '
' --set capabilities.runtimeDetection=enable '
' --set imagePullSecret.password=Q5UMRCFPRAHAIRWAYTOP7P4PK9ZNV2H26JFTB70CMNZ2KG1NHGPYXK6PNPNC677E '
' --set imagePullSecret.server=quay.io '
' --set imagePullSecret.username=armosec+armosec_ro '
' --set imagePullSecrets=armosec-readonly '
' --set nodeAgent.image.repository=quay.io/armosec/node-agent '
)
if private_node_agent:
additional_params += f' --set nodeAgent.image.tag={private_node_agent} --set nodeAgent.image.repository=quay.io/armosec/node-agent'
else:
additional_params += ' --set nodeAgent.image.tag=v0.0.25 --set nodeAgent.image.repository=quay.io/armosec/node-agent'
helm_command += ' ' + additional_params
run_command(helm_command)
time.sleep(30) # Wait for the operator to deploy
print("waiting for operator to deploy - 30 sec")
print("Kubescape Operator deployed successfully.")
if additional_helm_command:
print("Deploying additional Helm chart...", additional_helm_command)
run_command(additional_helm_command)
print("Additional Helm chart deployed successfully.")
except subprocess.CalledProcessError as e:
print(f"Failed to deploy Kubescape with exit code {e.returncode}")
print(f"Error output:\n{e.stderr}")
exit(1)
# Step 3: Wait for the cluster to be ready
def check_cluster_ready(timeout=300): # Timeout 5 min
start_time = time.time()
while True:
elapsed_time = time.time() - start_time
if elapsed_time > timeout:
print(f"Timeout exceeded! Waited for {timeout / 60} minutes.")
break
try:
result = subprocess.run(
['kubectl', 'get', 'pods', '-A'],
check=True, capture_output=True, text=True
)
# Process each line of the output
all_pods_ready = True
total_pods = 0
pods_ready = 0
for line in result.stdout.splitlines()[1:]: # Skip the header line
total_pods += 1
columns = line.split()
ready_ratio = columns[2]
ready, total = map(int, ready_ratio.split('/'))
# Check if the pod is in the "Running" state and all containers are ready
if columns[3] == "Running" and ready == total:
pods_ready += 1
else:
all_pods_ready = False
if all_pods_ready and total_pods == pods_ready:
print(f"All {total_pods} pods are running and ready.")
break
else:
print(f"Waiting for all pods to be ready... ({pods_ready}/{total_pods})")
except subprocess.CalledProcessError as e:
print("Cluster not ready yet, retrying...")
# Sleep for 10 seconds before checking again
time.sleep(10)
# Step 4: Check for pods in CrashLoopBackOff state using kubectl
def check_crashloop_pods(namespace='default'):
try:
result = subprocess.run(
['kubectl', 'get', 'pods', '-n', namespace],
check=True, capture_output=True, text=True
)
all_pods_stable = True
total_pods = 0
stable_pods = 0
for line in result.stdout.splitlines()[1:]:
total_pods += 1
columns = line.split()
pod_name = columns[0]
pod_status = columns[3]
# Check if the pod is in the "CrashLoopBackOff" state
if "CrashLoopBackOff" in pod_status:
all_pods_stable = False
print(f"Pod {pod_name} is in CrashLoopBackOff. Describing the pod...")
# Describe the pod that is in CrashLoopBackOff
describe_result = subprocess.run(
['kubectl', 'describe', 'pod', pod_name, '-n', namespace],
check=True, capture_output=True, text=True
)
print(describe_result.stdout)
else:
stable_pods += 1
if all_pods_stable and total_pods == stable_pods:
print(f"All {total_pods} pods in namespace '{namespace}' are stable.")
return True
else:
print(f"Pods not stable yet... ({stable_pods}/{total_pods})")
return False
except subprocess.CalledProcessError as e:
print(f"Failed to check pods in namespace '{namespace}': {e}")
return False
def destroy_cluster():
terraform_dir = os.path.join("terraform-test-clusters", "EKS")
print("Destroying Terraform-managed infrastructure...")
run_command('terraform destroy -auto-approve', terraform_dir)
print("Infrastructure destroyed successfully.")
def main():
# Parse command-line arguments
parser = argparse.ArgumentParser(description="Deploy Kubescape with optional Helm parameters")
parser.add_argument('-kdr', action='store_true', help="Enable KDR capabilities")
parser.add_argument('-nodes', type=int, default=3, help="Number of nodes (default is 3)")
parser.add_argument('-account', type=str, required=True, help="Account ID")
parser.add_argument('-accessKey', type=str, required=True, help="Access key")
parser.add_argument('-duration', type=int, default=4, help="Duration time in hours (default is 4)")
parser.add_argument('-destroy', action='store_true', help="Destroy the Terraform-managed infrastructure")
parser.add_argument('-skip-cluster', action='store_true', help="Skip cluster creation and connection")
parser.add_argument('-version', type=str, help="Specify the Helm chart version for Kubescape")
parser.add_argument('-additional-helm-command', type=str, help="Additional helm command")
parser.add_argument('-storage-version', type=str, help="Specify the storage image version")
parser.add_argument('-node-agent-version', type=str, help="Specify the node agent image version")
parser.add_argument('-private-node-agent', type=str, help="Specify the private node agent version")
args = parser.parse_args()
terraform_dir = os.path.join("terraform-test-clusters", "EKS")
if args.destroy:
destroy_cluster()
return
# Step 1: Create cluster and connect to it, unless --skip-cluster is used
if not args.skip_cluster:
node_count = setup_cluster(node_count=args.nodes)
# Extract region and cluster name from Terraform outputs
region = subprocess.run(['terraform', 'output', '-raw', 'region'], check=True, capture_output=True, text=True, cwd = terraform_dir).stdout.strip()
cluster_name = subprocess.run(['terraform', 'output', '-raw', 'cluster_name'], check=True, capture_output=True, text=True, cwd = terraform_dir).stdout.strip()
# Step 2: Connect to the EKS cluster
connect_to_eks_cluster(region, cluster_name, terraform_dir)
else:
# Use default node count if skipping cluster creation
print("Skipping cluster creation and connection.")
node_count = args.nodes
# Deploy prometheus and microservices demo
# deploy_kube_prometheus_stack()
# deploy_pyroscope()
# Step 3: Deploy Kubescape using Helm
deploy_kubescape(
account=args.account,
accessKey=args.accessKey,
version=args.version,
enable_kdr=args.kdr,
additional_helm_command=args.additional_helm_command,
storage_image_tag=args.storage_version,
node_agent_image_tag=args.node_agent_version,
private_node_agent=args.private_node_agent
)
# time.sleep(40) # Wait for the operator to deploy
# namespaces = create_parallel_namespaces(node_count)
# apply_microservices_demo(namespaces)
# # Step 4: Check if the cluster is ready by polling the node readiness
# check_cluster_ready()
# # Step 5: Check if any pods are in CrashLoopBackOff state
# print("Checking for pods in CrashLoopBackOff state...")
# check_crashloop_pods(namespace="kubescape")
if __name__ == "__main__":
main()