-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathorphan-mounts-stress.sh
executable file
·69 lines (56 loc) · 3.07 KB
/
orphan-mounts-stress.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/bin/bash
# Copyright 2021 NetApp, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0.
# Prior to the v1.2.1 release, on certain versions of Kubernetes, the driver's node service would infrequently leave
# behind some number of orphan mounts (mounted BeeGFS file systems that could not be cleaned up without manual
# intervention, even though the associated Persistent Volume had been deleted from the Kubernetes API server already).
# The fixes in v1.2.1 should prevent the issue moving forward, but due to its intermittent nature, testing for the issue
# is difficult.
# This script runs the end-to-end test suite against a cluster many times in a row. If the suite fails (and it WILL
# fail if orphaned mounts are detected), the script logs all relevant information to a temporary directory. If the suite
# succeeds on all attempts, the script logs everything the controller service reported about the orphan mount
# infrastructure while it was running.
# Set KUBECONFIG.
# Deploy the driver.
# Modify DRIVER_NAMESPACE if necessary.
# Run this script.
START_TIME=${SECONDS}
DRIVER_NAMESPACE=default
OUTPUT_DIR=$(mktemp -d /tmp/e2e.XXXXXX)
export KUBE_SSH_USER=root
exec > >(tee -a ${OUTPUT_DIR}/script.log) 2>&1 # Log all script output to OUTPUT_DIR.
fail() {
ELAPSED=$((${SECONDS} - ${START_TIME}))
kubectl logs -n ${DRIVER_NAMESPACE} -c beegfs --since=${ELAPSED}s csi-beegfs-controller-0 >${OUTPUT_DIR}/controller.log
kubectl get pod -n ${DRIVER_NAMESPACE} -l app=csi-beegfs-node --no-headers | awk '{print $1}' | xargs -I{} kubectl logs -n ${DRIVER_NAMESPACE} -c beegfs --since=${ELAPSED}s {} >>${OUTPUT_DIR}/nodes.log
echo "see ${OUTPUT_DIR} for details"
exit
}
echo "cleaning up previous test"
kubectl get ns --no-headers | awk '{print $1}' | grep -e provisioning- -e stress- -e beegfs- -e multivolume- -e ephemeral- -e volumemode- -e volumelimits- | xargs kubectl delete ns --cascade=foreground 2>/dev/null
echo
for ITERATION in {1..20}; do
echo iteration ${ITERATION}
date
echo "running nondisruptive ginkgo tests"
if ginkgo -p -nodes 8 -skip 'should be able to unmount after the subpath directory is deleted|\[Slow\]|\[Disruptive\]|\[Serial\]' -timeout 60m ./test/e2e/ >${OUTPUT_DIR}/nondisruptive.log; then
echo "nondisruptive ginkgo tests passed"
else
echo "nondisruptive ginkgo tests failed"
fail
fi
echo "running disruptive ginkgo tests"
if ginkgo -v -noColor -skip 'should be able to unmount after the subpath directory is deleted|\[Slow\]' -focus '\[Disruptive\]|\[Serial\]' -timeout 60m ./test/e2e/ >${OUTPUT_DIR}/disruptive.log; then
echo "disruptive ginkgo tests passed"
else
echo "disruptive ginkgo tests failed"
fail
fi
echo
done
ELAPSED=$((SECONDS - START_TIME))
# Print any controller logs that indicate either:
# -> The orphan mount prevention infrastructure worked, (waited for unstage) or
# -> The orphan mount infrastructure had difficulties, (failed to clean up due to a busy mount).
kubectl logs -n ${DRIVER_NAMESPACE} -c beegfs --since=${ELAPSED}s csi-beegfs-controller-0 | grep -e Waiting -e busy
rm -rf ${OUTPUT_DIR}