From cdaeccf9ffffe0dd7cfb5bbc6c7a5ac8ca4dc068 Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Mon, 16 Oct 2023 13:10:42 +0200 Subject: [PATCH 01/15] Push failed test k8s logs to S3 bucket --- Jenkinsfile | 16 +++++++++++++++ e2e-tests/functions | 47 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 973c1bbe21..871908e15f 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -97,6 +97,21 @@ void pushLogFile(String FILE_NAME) { } } +void pushK8SLogs(String TEST_NAME) { + def LOG_FILE_PATH="e2e-tests/logs/" + def FILE_NAMES="logs_${TEST_NAME}_*" + echo "Push k8s logs to S3!" + + withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', accessKeyVariable: 'AWS_ACCESS_KEY_ID', credentialsId: 'AMI/OVF', secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) { + sh """ + S3_PATH=s3://percona-jenkins-artifactory/\$JOB_NAME/\$(git rev-parse --short HEAD)/logs/ + aws s3 ls \$S3_PATH || : + aws s3 rm \$S3_PATH --recursive --exclude "*" --include "${FILE_NAMES}" || : + aws s3 cp --quiet ${LOG_FILE_PATH} \$S3_PATH --recursive --exclude "*" --include "$FILE_NAMES" || : + """ + } +} + void popArtifactFile(String FILE_NAME) { echo "Try to get $FILE_NAME file from S3!" @@ -211,6 +226,7 @@ void runTest(Integer TEST_ID) { return true } catch (exc) { + pushK8SLogs("$testName") if (retryCount >= 1 || currentBuild.nextBuild != null) { currentBuild.result = 'FAILURE' return true diff --git a/e2e-tests/functions b/e2e-tests/functions index 00469b3a0c..f547876c94 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -150,6 +150,7 @@ wait_pod() { echo -n . let retry+=1 if [ $retry -ge 360 ]; then + collect_k8s_logs kubectl_bin describe pod/$pod kubectl_bin logs $pod kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ @@ -158,6 +159,7 @@ wait_pod() { | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 + echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -177,12 +179,14 @@ wait_cron() { echo -n . let retry+=1 if [ $retry -ge 360 ]; then + collect_k8s_logs kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 + echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -201,8 +205,10 @@ wait_backup_agent() { echo -n . let retry+=1 if [ $retry -ge 360 ]; then + collect_k8s_logs kubectl_bin logs $agent_pod -c backup-agent \ | tail -100 + echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -224,12 +230,14 @@ wait_backup() { let retry+=1 current_status=$(kubectl_bin get psmdb-backup $backup_name -o jsonpath='{.status.state}') if [[ $retry -ge 360 || ${current_status} == 'error' ]]; then + collect_k8s_logs kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 + echo "Backup object psmdb-backup/${backup_name} is in ${current_state} state." echo something went wrong with operator or kubernetes cluster exit 1 @@ -283,12 +291,14 @@ wait_deployment() { echo -n . let retry+=1 if [ $retry -ge 360 ]; then + collect_k8s_logs kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 + echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -329,6 +339,7 @@ wait_restore() { let retry+=1 current_state=$(kubectl_bin get psmdb-restore restore-$backup_name -o jsonpath='{.status.state}') if [[ $retry -ge $wait_time || ${current_state} == 'error' ]]; then + collect_k8s_logs kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ @@ -542,6 +553,7 @@ retry() { until "$@"; do if [[ $n -ge $max ]]; then + collect_k8s_logs echo "The command '$@' has failed after $n attempts." exit 1 fi @@ -581,6 +593,7 @@ wait_for_running() { timeout=$((timeout + 1)) echo -n '.' if [[ ${timeout} -gt 1500 ]]; then + collect_k8s_logs echo echo "Waiting timeout has been reached. Exiting..." exit 1 @@ -603,12 +616,14 @@ wait_for_delete() { echo -n . let retry+=1 if [ $retry -ge $wait_time ]; then + collect_k8s_logs kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 + echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -624,6 +639,8 @@ compare_generation() { current_generation="$(kubectl_bin get ${resource_type} "${resource_name}" -o jsonpath='{.metadata.generation}')" if [[ ${generation} != "${current_generation}" ]]; then + collect_k8s_logs + echo "Generation for ${resource_type}/${resource_name} is: ${current_generation}, but should be: ${generation}" exit 1 fi @@ -984,6 +1001,7 @@ get_service_endpoint() { return fi + collect_k8s_logs exit 1 } @@ -1160,6 +1178,7 @@ wait_cluster_consistency() { until [[ "$(kubectl_bin get psmdb "${cluster_name}" -o jsonpath='{.status.state}')" == "ready" ]]; do let retry+=1 if [ $retry -ge $wait_time ]; then + collect_k8s_logs echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -1186,6 +1205,7 @@ check_backup_deletion() { retry=0 until [[ $(curl -sw '%{http_code}' -o /dev/null $path) -eq 403 ]] || [[ $(curl -sw '%{http_code}' -o /dev/null $path) -eq 404 ]]; do if [ $retry -ge 10 ]; then + collect_k8s_logs echo max retry count $retry reached. something went wrong with operator or kubernetes cluster echo "Backup was not removed from bucket -- $storage_name" exit 1 @@ -1247,6 +1267,7 @@ function get_mongod_ver_from_image() { version_info=$(run_simple_cli_inside_image ${image} 'mongod --version' | $sed -r 's/^.*db version v(([0-9]+\.){2}[0-9]+-[0-9]+).*$/\1/g') if [[ ! ${version_info} =~ ^([0-9]+\.){2}[0-9]+-[0-9]+$ ]]; then + collect_k8s_logs printf "No mongod version obtained from %s. Exiting" ${image} exit 1 fi @@ -1259,6 +1280,7 @@ function get_pbm_version() { local version_info=$(run_simple_cli_inside_image ${image} 'pbm-agent version' | $sed -r 's/^Version:\ (([0-9]+\.){2}[0-9]+)\ .*/\1/g') if [[ ! ${version_info} =~ ^([0-9]+\.){2}[0-9]+$ ]]; then + collect_k8s_logs printf "No pbm version obtained from %s. Exiting" ${image} exit 1 fi @@ -1299,6 +1321,31 @@ function generate_vs_json() { echo ${version_service_source} | jq '.' >${target_path} } +collect_k8s_logs() { + if [[ ${ENABLE_LOGGING} == "true" ]]; then + rm -f ${logs_dir}/logs_${test_name}_* || : + + local check_namespaces="$namespace${OPERATOR_NS:+ $OPERATOR_NS}" + + for ns in $check_namespaces; do + local pods=$(kubectl_bin get pods -o name | awk -F "/" '{print $2}') + for p in $pods; do + local containers=$(kubectl_bin -n "$ns" get pod $p -o jsonpath='{.spec.containers[*].name}') + for c in $containers; do + kubectl_bin -n "$ns" logs $p -c $c >${logs_dir}/logs_${test_name}_$p_$c.txt + echo logs saved in: ${logs_dir}/logs_${test_name}_$p_$c.txt + done + done + done + for object in "psmdb psmdb-backup psmdb-restore pods deployments services events sts"; do + echo "##### START: $object #####" >>${logs_dir}/logs_${test_name}_simple.txt + kubectl_bin get $object --all-namespaces >>${logs_dir}/logs_${test_name}_simple.txt + echo "##### END: $object ####\n" >>${logs_dir}/logs_${test_name}_simple.txt + kubectl_bin get $object --all-namespaces >${logs_dir}/logs_${test_name}_$object.yaml + done + fi +} + check_passwords_leak() { local secrets local passwords From 807dd676e31af61014d742bea032518c0e176646 Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Thu, 21 Sep 2023 11:03:44 +0200 Subject: [PATCH 02/15] Add collect_k8s_logs into tests and add pod describe --- e2e-tests/arbiter/run | 1 + e2e-tests/balancer/run | 1 + e2e-tests/cross-site-sharded/run | 1 + e2e-tests/data-at-rest-encryption/run | 2 ++ e2e-tests/data-sharded/run | 3 +++ e2e-tests/default-cr/run | 1 + e2e-tests/demand-backup-physical-sharded/run | 3 +++ e2e-tests/demand-backup-physical/run | 1 + e2e-tests/demand-backup-sharded/run | 1 + e2e-tests/demand-backup/run | 2 ++ e2e-tests/expose-sharded/run | 2 ++ e2e-tests/functions | 1 + e2e-tests/init-deploy/run | 2 ++ e2e-tests/mongod-major-upgrade-sharded/run | 1 + e2e-tests/mongod-major-upgrade/run | 1 + e2e-tests/monitoring-2-0/run | 1 + e2e-tests/multi-cluster-service/run | 3 +++ e2e-tests/rs-shard-migration/run | 4 ++++ e2e-tests/self-healing-chaos/run | 1 + e2e-tests/service-per-pod/run | 1 + e2e-tests/smart-update/run | 3 +++ e2e-tests/split-horizon/run | 2 ++ e2e-tests/tls-issue-cert-manager/run | 1 + e2e-tests/upgrade-sharded/run | 6 ++++++ e2e-tests/upgrade/run | 5 +++++ e2e-tests/version-service/run | 10 ++++++---- 26 files changed, 56 insertions(+), 4 deletions(-) diff --git a/e2e-tests/arbiter/run b/e2e-tests/arbiter/run index 2721feae74..900b011fb0 100755 --- a/e2e-tests/arbiter/run +++ b/e2e-tests/arbiter/run @@ -31,6 +31,7 @@ check_cr_config() { if [[ $(kubectl_bin get pod \ --selector=statefulset.kubernetes.io/pod-name="${cluster}-arbiter-0" \ -o jsonpath='{.items[*].status.containerStatuses[?(@.name == "mongod-arbiter")].restartCount}') -gt 0 ]]; then + collect_k8s_logs echo "Something went wrong with arbiter. Exiting..." exit 1 fi diff --git a/e2e-tests/balancer/run b/e2e-tests/balancer/run index 7272c411c8..7855b4d50b 100755 --- a/e2e-tests/balancer/run +++ b/e2e-tests/balancer/run @@ -15,6 +15,7 @@ check_balancer() { | grep -E -v "Percona Server for MongoDB|connecting to:|Implicit session:|versions do not match|Error saving history file:|bye") if [[ $balancer_running != "$expected" ]]; then + collect_k8s_logs echo "Unexpected output from \"db.adminCommand({balancerStatus: 1}).mode\": $balancer_running" echo "Expected $expected" exit 1 diff --git a/e2e-tests/cross-site-sharded/run b/e2e-tests/cross-site-sharded/run index 47c688f7f3..9f5e8dbd86 100755 --- a/e2e-tests/cross-site-sharded/run +++ b/e2e-tests/cross-site-sharded/run @@ -101,6 +101,7 @@ for i in "rs0" "rs1"; do done if [[ $shards -lt 2 ]]; then + collect_k8s_logs echo "data is only on some of the shards, maybe sharding is not working" exit 1 fi diff --git a/e2e-tests/data-at-rest-encryption/run b/e2e-tests/data-at-rest-encryption/run index ff2c08b65d..f6b60cc854 100755 --- a/e2e-tests/data-at-rest-encryption/run +++ b/e2e-tests/data-at-rest-encryption/run @@ -83,6 +83,7 @@ encrypted_cluster_log=$(kubectl_bin logs some-name-rs0-0 -c mongod -n $namespace echo "$encrypted_cluster_log" if [ -z "$encrypted_cluster_log" ]; then + collect_k8s_logs echo "Cluster is not encrypted" exit 1 fi @@ -99,6 +100,7 @@ until [ "$retry" -ge 10 ]; do echo "Cluster is not encrypted already" break elif [ $retry == 15 ]; then + collect_k8s_logs echo "Max retry count $retry reached. Cluster is still encrypted" exit 1 else diff --git a/e2e-tests/data-sharded/run b/e2e-tests/data-sharded/run index 5c77ed5a78..ecfd985cea 100755 --- a/e2e-tests/data-sharded/run +++ b/e2e-tests/data-sharded/run @@ -17,6 +17,7 @@ check_rs_proper_component_deletion() { until [[ $(kubectl_bin get sts -l app.kubernetes.io/instance=${cluster},app.kubernetes.io/replset=${rs_name} -ojson | jq '.items | length') -eq 0 ]]; do let retry+=1 if [ $retry -ge 70 ]; then + collect_k8s_logs sts_count=$(kubectl_bin get sts -l app.kubernetes.io/instance=${cluster},app.kubernetes.io/replset=${rs_name} -ojson | jq '.items | length') echo "Replset $rs_name not properly removed, expected sts count of 0 but got $sts_count. Exiting after $retry tries..." exit 1 @@ -115,6 +116,7 @@ main() { done if [[ $shards -lt 3 ]]; then + collect_k8s_logs echo "data is only on some of the shards, maybe sharding is not working" exit 1 fi @@ -125,6 +127,7 @@ main() { "clusterAdmin:clusterAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \ "--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls") if ! echo $res | grep -q '"ok" : 1'; then + collect_k8s_logs echo "app database not dropped. Exiting.." exit 1 fi diff --git a/e2e-tests/default-cr/run b/e2e-tests/default-cr/run index 199b481f25..fbd64cd579 100755 --- a/e2e-tests/default-cr/run +++ b/e2e-tests/default-cr/run @@ -27,6 +27,7 @@ function stop_cluster() { let passed_time="${passed_time}+${sleep_time}" sleep ${sleep_time} if [[ ${passed_time} -gt ${max_wait_time} ]]; then + collect_k8s_logs echo "We've been waiting for cluster stop for too long. Exiting..." exit 1 fi diff --git a/e2e-tests/demand-backup-physical-sharded/run b/e2e-tests/demand-backup-physical-sharded/run index e08867d333..e0efefa122 100755 --- a/e2e-tests/demand-backup-physical-sharded/run +++ b/e2e-tests/demand-backup-physical-sharded/run @@ -38,6 +38,7 @@ run_recovery_check() { wait_restore "${backup_name}" "${cluster}" "ready" "0" "1800" kubectl_bin get psmdb ${cluster} -o yaml if [ $(kubectl_bin get psmdb ${cluster} -o yaml | yq '.metadata.annotations."percona.com/resync-pbm"') == null ]; then + collect_k8s_logs echo "psmdb/${cluster} should be annotated with percona.com/resync-pbm after a physical restore" exit 1 fi @@ -52,6 +53,7 @@ check_exported_mongos_service_endpoint() { local host=$1 if [ "$host" != "$(kubectl_bin get psmdb $cluster -o=jsonpath='{.status.host}')" ]; then + collect_k8s_logs echo "Exported host is not correct after the restore" exit 1 fi @@ -80,6 +82,7 @@ wait_cluster_consistency ${cluster} lbEndpoint=$(kubectl_bin get svc $cluster-mongos -o=jsonpath='{.status}' | jq -r 'select(.loadBalancer != null and .loadBalancer.ingress != null and .loadBalancer.ingress != []) | .loadBalancer.ingress[0][]') if [ -z $lbEndpoint ]; then + collect_k8s_logs echo "mongos service not exported correctly" exit 1 fi diff --git a/e2e-tests/demand-backup-physical/run b/e2e-tests/demand-backup-physical/run index 16d1042560..774f90a281 100755 --- a/e2e-tests/demand-backup-physical/run +++ b/e2e-tests/demand-backup-physical/run @@ -38,6 +38,7 @@ run_recovery_check() { wait_restore "${backup_name}" "${cluster}" "ready" "0" "1800" kubectl_bin get psmdb ${cluster} -o yaml if [ $(kubectl_bin get psmdb ${cluster} -o yaml | yq '.metadata.annotations."percona.com/resync-pbm"') == null ]; then + collect_k8s_logs echo "psmdb/${cluster} should be annotated with percona.com/resync-pbm after a physical restore" exit 1 fi diff --git a/e2e-tests/demand-backup-sharded/run b/e2e-tests/demand-backup-sharded/run index 94456ba08a..cc135e8201 100755 --- a/e2e-tests/demand-backup-sharded/run +++ b/e2e-tests/demand-backup-sharded/run @@ -166,6 +166,7 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest /usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \ | grep -c ${backup_dest_minio}_ | cat) if [[ $backup_exists -eq 1 ]]; then + collect_k8s_logs echo "Backup was not removed from bucket -- minio" exit 1 fi diff --git a/e2e-tests/demand-backup/run b/e2e-tests/demand-backup/run index b0f2846b4f..a9e874b467 100755 --- a/e2e-tests/demand-backup/run +++ b/e2e-tests/demand-backup/run @@ -135,6 +135,7 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest /usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \ | grep -c ${backup_dest_minio} | cat) if [[ $backup_exists -eq 1 ]]; then + collect_k8s_logs echo "Backup was not removed from bucket -- minio" exit 1 fi @@ -170,6 +171,7 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest /usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \ | grep -c ${backup_dest_minio} | cat) if [[ $backup_exists -eq 1 ]]; then + collect_k8s_logs echo "Backup was not removed from bucket -- minio" exit 1 fi diff --git a/e2e-tests/expose-sharded/run b/e2e-tests/expose-sharded/run index 7e49876388..cc5237f418 100755 --- a/e2e-tests/expose-sharded/run +++ b/e2e-tests/expose-sharded/run @@ -23,6 +23,7 @@ function stop_cluster() { let passed_time="${passed_time}+${sleep_time}" sleep ${passed_time} if [[ ${passed_time} -gt ${max_wait_time} ]]; then + collect_k8s_logs echo "We've been waiting for cluster stop for too long. Exiting..." exit 1 fi @@ -52,6 +53,7 @@ function compare_mongo_config() { rs0_0_endpoint_actual=$(run_mongo 'var host;var x=0;rs.conf().members.forEach(function(d){ if(d.tags.podName=="some-name-rs0-0"){ host=rs.conf().members[x].host;print(host)};x=x+1; })' "clusterAdmin:clusterAdmin123456@${cluster}-rs0.${namespace}" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:|bye') if [[ $rs0_0_endpoint_actual != "$rs0_0_endpoint:27017" || $cfg_0_endpoint_actual != "$cfg_0_endpoint:27017" ]]; then + collect_k8s_logs desc "Actual values rs $rs0_0_endpoint_actual and cfg $cfg_0_endpoint_actual do not match expected rs $rs0_0_endpoint:27017 and cfg $cfg_0_endpoint:27017" exit 1 fi diff --git a/e2e-tests/functions b/e2e-tests/functions index f547876c94..80b1b95357 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -1330,6 +1330,7 @@ collect_k8s_logs() { for ns in $check_namespaces; do local pods=$(kubectl_bin get pods -o name | awk -F "/" '{print $2}') for p in $pods; do + kubectl_bin -n "$ns" describe pod $p >${logs_dir}/logs_${test_name}_$p_.dsc local containers=$(kubectl_bin -n "$ns" get pod $p -o jsonpath='{.spec.containers[*].name}') for c in $containers; do kubectl_bin -n "$ns" logs $p -c $c >${logs_dir}/logs_${test_name}_$p_$c.txt diff --git a/e2e-tests/init-deploy/run b/e2e-tests/init-deploy/run index 7520d936c3..33184c1926 100755 --- a/e2e-tests/init-deploy/run +++ b/e2e-tests/init-deploy/run @@ -61,6 +61,8 @@ compare_mongo_cmd "find" "myApp:myPass@$cluster-2.$cluster.$namespace" desc 'check number of connections' conn_count=$(run_mongo 'db.serverStatus().connections.current' "clusterAdmin:clusterAdmin123456@$cluster.$namespace" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|bye') if [ ${conn_count} -gt ${max_conn} ]; then + collect_k8s_logs + echo "Mongo connection count ${conn_count} is greater than maximum connection count limit: ${max_conn}" exit 1 fi diff --git a/e2e-tests/mongod-major-upgrade-sharded/run b/e2e-tests/mongod-major-upgrade-sharded/run index e4378d70c6..7c92f04d9a 100755 --- a/e2e-tests/mongod-major-upgrade-sharded/run +++ b/e2e-tests/mongod-major-upgrade-sharded/run @@ -94,6 +94,7 @@ function main() { | grep -E '^\{.*\}$' | jq -r '.featureCompatibilityVersion.version') if [[ ${currentFCV} != ${version} ]]; then + collect_k8s_logs echo "FCV at the moment is ${currentFCV} and is not set to ${version} as it should. Exiting..." exit 1 fi diff --git a/e2e-tests/mongod-major-upgrade/run b/e2e-tests/mongod-major-upgrade/run index 8cb58e23fc..8aed90e661 100755 --- a/e2e-tests/mongod-major-upgrade/run +++ b/e2e-tests/mongod-major-upgrade/run @@ -89,6 +89,7 @@ function main() { | grep -E '^\{.*\}$' | jq -r '.featureCompatibilityVersion.version') if [[ ${currentFCV} != ${version} ]]; then + collect_k8s_logs echo "FCV at the moment is ${currentFCV} and is not set to ${version} as it should. Exiting..." exit 1 fi diff --git a/e2e-tests/monitoring-2-0/run b/e2e-tests/monitoring-2-0/run index d535a2a446..533ecbf4fa 100755 --- a/e2e-tests/monitoring-2-0/run +++ b/e2e-tests/monitoring-2-0/run @@ -37,6 +37,7 @@ until kubectl_bin exec monitoring-0 -- bash -c "ls -l /proc/*/exe 2>/dev/null| g sleep 5 let retry+=1 if [ $retry -ge 20 ]; then + collect_k8s_logs echo "Max retry count $retry reached. Pmm-server can't start" exit 1 fi diff --git a/e2e-tests/multi-cluster-service/run b/e2e-tests/multi-cluster-service/run index 4dc5b7560d..10c2be006c 100755 --- a/e2e-tests/multi-cluster-service/run +++ b/e2e-tests/multi-cluster-service/run @@ -23,6 +23,7 @@ wait_mcs_api() { until [[ $(kubectl_bin api-resources | grep ServiceExport | wc -l) -eq 1 ]]; do let retry+=1 if [ $retry -ge 64 ]; then + collect_k8s_logs echo max retry count $retry reached. Something went wrong with MCS, probably a problem on GCP side. exit 1 fi @@ -40,6 +41,7 @@ wait_service_import() { until [[ "$(kubectl_bin get serviceimport --ignore-not-found | grep -v 'NAME' | wc -l)" -eq "9" ]]; do let retry+=1 if [ $retry -ge 64 ]; then + collect_k8s_logs echo max retry count $retry reached. Something went wrong with MCS, probably a problem in gke-mcs-importer. exit 1 fi @@ -58,6 +60,7 @@ wait_service_export() { until [[ "$(kubectl_bin get serviceexport --ignore-not-found | grep -v 'NAME' | wc -l)" -eq "9" ]]; do let retry+=1 if [ $retry -ge 64 ]; then + collect_k8s_logs echo max retry count $retry reached. Something went wrong with MCS, probably a problem in gke-mcs-exporter. exit 1 fi diff --git a/e2e-tests/rs-shard-migration/run b/e2e-tests/rs-shard-migration/run index 7020b091a6..465fe4fc91 100755 --- a/e2e-tests/rs-shard-migration/run +++ b/e2e-tests/rs-shard-migration/run @@ -38,10 +38,12 @@ function main() { wait_cluster_consistency "${cluster}" if [[ $(kubectl_bin get statefulset/${cluster}-mongos -o jsonpath='{.status.readyReplicas}') -lt 1 ]]; then + collect_k8s_logs echo "Mongos hasn't been properly started. Exiting..." exit 1 fi if [[ "$(kubectl_bin get sts/${cluster}-cfg -o jsonpath='{.status.replicas}')" != "$(kubectl_bin get sts/${cluster}-cfg -o jsonpath='{.status.readyReplicas}')" ]]; then + collect_k8s_logs echo "Cfg pods haven't been properly started. Exiting..." exit 1 fi @@ -54,6 +56,7 @@ function main() { if [[ -z "$(get_shard_parameter ${cluster} ${namespace} lastCommitedOpTime)" ]] \ && [[ -z "$(get_shard_parameter ${cluster} ${namespace} '$configServerState.opTime.ts')" ]]; then # for mongo 3.6 + collect_k8s_logs echo "Sharded cluster does not work properly" exit 1 fi @@ -70,6 +73,7 @@ function main() { || [[ -n "$(kubectl_bin get service -o jsonpath='{.items[?(@.metadata.name == "'"${cluster}-mongos"'")].metadata.name}')" ]] \ || [[ -n "$(kubectl_bin get service -o jsonpath='{.items[?(@.metadata.name == "'"${cluster}-cfg"'")].metadata.name}')" ]] \ || [[ -n "$(kubectl_bin get statefulset -o jsonpath='{.items[?(@.metadata.name == "'"${cluster}-cfg"'")].metadata.name}')" ]]; then + collect_k8s_logs echo "Transition to replicaset cluster has not been done well. Cluster does not work properly or some leftovers still exist" exit 1 fi diff --git a/e2e-tests/self-healing-chaos/run b/e2e-tests/self-healing-chaos/run index 1380150331..bddea5d5e0 100755 --- a/e2e-tests/self-healing-chaos/run +++ b/e2e-tests/self-healing-chaos/run @@ -15,6 +15,7 @@ check_pod_restarted() { local new_resourceVersion=$(kubectl get pod $pod -ojson | jq '.metadata.resourceVersion' | tr -d '"') if [[ $old_resourceVersion == "$new_resourceVersion" ]]; then + collect_k8s_logs echo "Chaos mesh didn't work for some reason. Please check!!!" echo "The resourceVersion was not changed: $new_resourceVersion" exit 1 diff --git a/e2e-tests/service-per-pod/run b/e2e-tests/service-per-pod/run index e3d2131d1b..cadfdf3b30 100755 --- a/e2e-tests/service-per-pod/run +++ b/e2e-tests/service-per-pod/run @@ -64,6 +64,7 @@ check_cr_config() { compare_kubectl service/node-port-rs0-0 "-updated" current_node_port=$(kubectl_bin get svc node-port-rs0-0 -o 'jsonpath={.spec.ports[0].nodePort}') if [[ $current_node_port != "$old_node_port" ]]; then + collect_k8s_logs echo "Node port changed from ${old_node_port} to ${current_node_port}" exit 1 fi diff --git a/e2e-tests/smart-update/run b/e2e-tests/smart-update/run index 015a4d0b19..ebb1178aa3 100755 --- a/e2e-tests/smart-update/run +++ b/e2e-tests/smart-update/run @@ -22,6 +22,7 @@ function check_pod_update() { echo "OK: Image ${img} was updated for pod ${pod_name}!" break elif [ ${retry} -ge 60 ]; then + collect_k8s_logs echo "Max retry count ${retry} reached." echo "ERROR: Image was not updated for pod ${pod_name}! Image is ${img}, but should be ${IMAGE_MONGOD_TO_UPDATE}." exit 1 @@ -67,6 +68,7 @@ done desc "check primary should have old image" img=$(kubectl get pod/$initial_primary -o jsonpath='{.spec.containers[0].image}') if [ "${img}" != "${IMAGE_MONGOD}" ]; then + collect_k8s_logs echo "image should be old on primary pod at that moment" exit 1 fi @@ -82,6 +84,7 @@ pods+=("${initial_primary}") for i in "${!pods[@]}"; do if [ "${pods[i]}" != "${restarted_pods[i]}" ]; then + collect_k8s_logs echo "pod ${pods[i]} is not equal to pod ${restarted_pods[i]}" exit 1 fi diff --git a/e2e-tests/split-horizon/run b/e2e-tests/split-horizon/run index 420202dee3..66df795d8b 100755 --- a/e2e-tests/split-horizon/run +++ b/e2e-tests/split-horizon/run @@ -50,6 +50,7 @@ diff $test_dir/compare/horizons-3.json $tmp_dir/horizons-3.json isMaster=$(run_mongo_tls "db.isMaster().ismaster" "clusterAdmin:clusterAdmin123456@some-name-rs0-0.clouddemo.xyz,some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" mongodb "" "--quiet" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:' | grep -v certificateNames) if [ "${isMaster}" != "true" ]; then + collect_k8s_logs echo "mongo client should've redirect the connection to primary" exit 1 fi @@ -63,6 +64,7 @@ sleep 10 # give some time for re-election isMaster=$(run_mongo_tls "db.isMaster().ismaster" "clusterAdmin:clusterAdmin123456@some-name-rs0-0.clouddemo.xyz,some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" mongodb "" "--quiet" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:' | grep -v certificateNames) if [ "${isMaster}" != "true" ]; then + collect_k8s_logs echo "mongo client should've redirect the connection to primary" exit 1 fi diff --git a/e2e-tests/tls-issue-cert-manager/run b/e2e-tests/tls-issue-cert-manager/run index e3c95720ce..ecdd860a34 100755 --- a/e2e-tests/tls-issue-cert-manager/run +++ b/e2e-tests/tls-issue-cert-manager/run @@ -20,6 +20,7 @@ check_secret_data_key() { secret_data=$(kubectl_bin get "secrets/${secret_name}" -o json | jq ".data[\"${data_key}\"]") if [ -z "$secret_data" ]; then + collect_k8s_logs exit 1 fi } diff --git a/e2e-tests/upgrade-sharded/run b/e2e-tests/upgrade-sharded/run index 08db6b2323..9b19e7984b 100755 --- a/e2e-tests/upgrade-sharded/run +++ b/e2e-tests/upgrade-sharded/run @@ -49,6 +49,7 @@ IMAGE_PMM_CLIENT=$(echo "${INIT_OPERATOR_IMAGES}" | jq -r '.versions[].matrix.pm IMAGE_BACKUP=$(echo "${INIT_OPERATOR_IMAGES}" | jq -r '.versions[].matrix.backup[].imagePath') if [[ ${TARGET_OPERATOR_VER} == "${INIT_OPERATOR_VER}" ]]; then + collect_k8s_logs echo "OPERATOR VERSION and INIT OPERATOR VERSION variables are the same: ${TARGET_OPERATOR_VER} ${INIT_OPERATOR_VER}! Something is wrong!" exit 1 fi @@ -61,6 +62,7 @@ function compare_generation() { current_generation=$(kubectl_bin get "${resource}" "${name}" -o jsonpath='{.metadata.generation}') if [[ ${generation} != "${current_generation}" ]]; then + collect_k8s_logs echo "Generation for resource type ${resource} with name ${name} is: ${current_generation}, but should be: ${generation}!" exit 1 fi @@ -73,6 +75,7 @@ function wait_cluster_consistency() { "$(kubectl_bin get psmdb "${cluster}" -o jsonpath='{.status.replsets.cfg.ready}')" == "${CLUSTER_SIZE}" ]]; do let retry+=1 if [ $retry -ge 32 ]; then + collect_k8s_logs echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -92,6 +95,7 @@ function check_applied_images() { ${IMAGE_MONGOD} == $(kubectl_bin get psmdb "${cluster}" -o jsonpath='{.spec.image}') ]]; then : Operator image has been updated correctly else + collect_k8s_logs echo 'Operator image has not been updated' exit 1 fi @@ -103,6 +107,7 @@ function check_applied_images() { ${TARGET_IMAGE_MONGOD} == $(kubectl_bin get psmdb "${cluster}" -o jsonpath='{.spec.image}') ]]; then : Cluster images have been updated correctly else + collect_k8s_logs echo 'Cluster images have not been updated' exit 1 fi @@ -148,6 +153,7 @@ function check_upgrade_order() { local nr=$(kubectl_bin get pod --sort-by=.status.startTime | grep -vE '^NAME|client|operator|minio-service' | sed -n "${start},${end}p" | grep -c "\-${pod_type}\-") if [[ ${nr} -ne ${cluster_size} ]]; then + collect_k8s_logs echo "${pod_type} was not upgraded ${upgrade_order}!" kubectl_bin get pod --sort-by=.status.startTime | grep -vE 'client|operator|minio-service' exit 1 diff --git a/e2e-tests/upgrade/run b/e2e-tests/upgrade/run index 78df239e26..dea31faf4f 100755 --- a/e2e-tests/upgrade/run +++ b/e2e-tests/upgrade/run @@ -52,6 +52,7 @@ IMAGE_PMM_CLIENT=$(echo "${INIT_OPERATOR_IMAGES}" | jq -r '.versions[].matrix.pm IMAGE_BACKUP=$(echo "${INIT_OPERATOR_IMAGES}" | jq -r '.versions[].matrix.backup[].imagePath') if [[ ${TARGET_OPERATOR_VER} == "${INIT_OPERATOR_VER}" ]]; then + collect_k8s_logs echo "OPERATOR VERSION and INIT OPERATOR VERSION variables are the same: ${TARGET_OPERATOR_VER} ${INIT_OPERATOR_VER}! Something is wrong!" exit 1 fi @@ -64,6 +65,7 @@ function compare_generation() { current_generation=$(kubectl_bin get "${resource}" "${name}" -o jsonpath='{.metadata.generation}') if [[ ${generation} != "${current_generation}" ]]; then + collect_k8s_logs echo "Generation for resource type ${resource} with name ${name} is: ${current_generation}, but should be: ${generation}!" exit 1 fi @@ -75,6 +77,7 @@ function wait_cluster_consistency() { "$(kubectl_bin get psmdb "${cluster}" -o jsonpath='{.status.replsets.rs0.ready}')" == "${CLUSTER_SIZE}" ]]; do let retry+=1 if [ $retry -ge 32 ]; then + collect_k8s_logs echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -94,6 +97,7 @@ function check_applied_images() { ${IMAGE_MONGOD} == $(kubectl_bin get psmdb "${cluster}" -o jsonpath='{.spec.image}') ]]; then : Operator image has been updated correctly else + collect_k8s_logs echo 'Operator image has not been updated' exit 1 fi @@ -105,6 +109,7 @@ function check_applied_images() { ${TARGET_IMAGE_MONGOD} == $(kubectl_bin get psmdb "${cluster}" -o jsonpath='{.spec.image}') ]]; then : Cluster images have been updated correctly else + collect_k8s_logs echo 'Cluster images have not been updated' exit 1 fi diff --git a/e2e-tests/version-service/run b/e2e-tests/version-service/run index 5122ee8a2f..99db6819dc 100755 --- a/e2e-tests/version-service/run +++ b/e2e-tests/version-service/run @@ -67,7 +67,7 @@ function check_telemetry_transfer() { # operator fallback VS should have telemetry diff ${test_dir}/compare/${telemetry_log_file} <(grep -f ${tmp_dir}/${telemetry_state}_telemetry.version-service.log.json ${test_dir}/compare/${telemetry_log_file}) # CR VS should not have telemetry - [[ -s "${tmp_dir}/enabled_telemetry.version-service-cr.log.json" ]] && exit 1 + [[ -s "${tmp_dir}/enabled_telemetry.version-service-cr.log.json" ]] && collect_k8s_logs && exit 1 fi local telemetry_cr_log_file="${telemetry_state}_telemetry.version-service-cr.log${OPERATOR_NS:+-cw}.json" @@ -77,15 +77,15 @@ function check_telemetry_transfer() { # cr VS should have telemetry diff ${test_dir}/compare/${telemetry_cr_log_file} <(grep -f ${tmp_dir}/${telemetry_state}_telemetry.version-service-cr.log.json ${test_dir}/compare/${telemetry_cr_log_file}) # operator VS should not have telemetry - [[ -s ${tmp_dir}/disabled_telemetry.version-service.log.json ]] && exit 1 + [[ -s ${tmp_dir}/disabled_telemetry.version-service.log.json ]] && collect_k8s_logs && exit 1 fi desc 'telemetry was disabled in CR as well as in operator' if [ "${cr_vs_channel}" == 'disabled' -a "${telemetry_state}" == 'disabled' ]; then # CR VS should not have telemetry - [[ -s ${tmp_dir}/disabled_telemetry.version-service-cr.log.json ]] && exit 1 + [[ -s ${tmp_dir}/disabled_telemetry.version-service-cr.log.json ]] && collect_k8s_logs && exit 1 # operator VS should not have telemetry - [[ -s ${tmp_dir}/disabled_telemetry.version-service.log.json ]] && exit 1 + [[ -s ${tmp_dir}/disabled_telemetry.version-service.log.json ]] && collect_k8s_logs && exit 1 fi kubectl_bin delete pod ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) @@ -183,6 +183,7 @@ for i in "${!cases[@]}"; do pods=($(kubectl get pods -l app.kubernetes.io/name=percona-server-mongodb -o=name)) if [ ${#pods[@]} -eq 0 ]; then + collect_k8s_logs echo "pods not found" exit 1 fi @@ -190,6 +191,7 @@ for i in "${!cases[@]}"; do for pod in "${pods[@]}"; do img=$(kubectl get $pod -o jsonpath='{.spec.containers[0].image}') if [ "$img" != "$expected_image" ]; then + collect_k8s_logs echo "image was not updated" exit 1 fi From 46309a290362e377223d213df1aefcae28c48a2a Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Thu, 21 Sep 2023 13:27:24 +0200 Subject: [PATCH 03/15] Minor fixes in collect_k8s_logs --- e2e-tests/functions | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/e2e-tests/functions b/e2e-tests/functions index 80b1b95357..8c38a4d2df 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -1325,24 +1325,24 @@ collect_k8s_logs() { if [[ ${ENABLE_LOGGING} == "true" ]]; then rm -f ${logs_dir}/logs_${test_name}_* || : - local check_namespaces="$namespace${OPERATOR_NS:+ $OPERATOR_NS}" - - for ns in $check_namespaces; do - local pods=$(kubectl_bin get pods -o name | awk -F "/" '{print $2}') - for p in $pods; do - kubectl_bin -n "$ns" describe pod $p >${logs_dir}/logs_${test_name}_$p_.dsc - local containers=$(kubectl_bin -n "$ns" get pod $p -o jsonpath='{.spec.containers[*].name}') - for c in $containers; do - kubectl_bin -n "$ns" logs $p -c $c >${logs_dir}/logs_${test_name}_$p_$c.txt - echo logs saved in: ${logs_dir}/logs_${test_name}_$p_$c.txt + local check_namespaces="${namespace}${OPERATOR_NS:+ $OPERATOR_NS}" + + for ns in ${check_namespaces}; do + local pods=$(kubectl_bin get pods -n "${ns}" -o name | awk -F "/" '{print $2}') + for p in ${pods}; do + kubectl_bin -n "${ns}" describe pod ${p} >${logs_dir}/logs_${test_name}_${ns}_${p}.dsc || : + local containers=$(kubectl_bin -n "${ns}" get pod ${p} -o jsonpath='{.spec.containers[*].name}') + for c in ${containers}; do + kubectl_bin -n "${ns}" logs ${p} -c ${c} >${logs_dir}/logs_${test_name}_${ns}_${p}_${c}.txt || : + echo logs saved in: ${logs_dir}/logs_${test_name}_${p}_${c}.txt done done done - for object in "psmdb psmdb-backup psmdb-restore pods deployments services events sts"; do - echo "##### START: $object #####" >>${logs_dir}/logs_${test_name}_simple.txt - kubectl_bin get $object --all-namespaces >>${logs_dir}/logs_${test_name}_simple.txt - echo "##### END: $object ####\n" >>${logs_dir}/logs_${test_name}_simple.txt - kubectl_bin get $object --all-namespaces >${logs_dir}/logs_${test_name}_$object.yaml + for object in psmdb psmdb-backup psmdb-restore pods deployments services events sts; do + echo "##### START: ${ns}: ${object} #####" >>${logs_dir}/logs_${test_name}_${ns}_simple.txt + kubectl_bin get ${object} -n "${ns}" >>${logs_dir}/logs_${test_name}_${ns}_simple.txt || : + echo "##### END: ${ns}: ${object} ####\n" >>${logs_dir}/logs_${test_name}_${ns}_simple.txt + kubectl_bin get ${object} -n "${ns}" >${logs_dir}/logs_${test_name}_${ns}_${object}.yaml || : done fi } From 53afb5758139472dcb3a38a1f086f09880aa114b Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Thu, 21 Sep 2023 16:05:50 +0200 Subject: [PATCH 04/15] Add collecting yaml of objects and add describe logs --- e2e-tests/functions | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/e2e-tests/functions b/e2e-tests/functions index 8c38a4d2df..b3a4c5a29f 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -1342,7 +1342,8 @@ collect_k8s_logs() { echo "##### START: ${ns}: ${object} #####" >>${logs_dir}/logs_${test_name}_${ns}_simple.txt kubectl_bin get ${object} -n "${ns}" >>${logs_dir}/logs_${test_name}_${ns}_simple.txt || : echo "##### END: ${ns}: ${object} ####\n" >>${logs_dir}/logs_${test_name}_${ns}_simple.txt - kubectl_bin get ${object} -n "${ns}" >${logs_dir}/logs_${test_name}_${ns}_${object}.yaml || : + kubectl_bin get ${object} -n "${ns}" -oyaml >${logs_dir}/logs_${test_name}_${ns}_${object}.yaml || : + kubectl_bin describe ${object} -n "${ns}" >${logs_dir}/logs_${test_name}_${ns}_${object}.dsc || : done fi } From f184d3a5d86a6072d0b8c6fe46dcb76d7a00ce52 Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Fri, 22 Sep 2023 13:14:54 +0200 Subject: [PATCH 05/15] Collect test logs into directory and zip it --- Jenkinsfile | 18 ++++++++++++------ e2e-tests/functions | 23 ++++++++++++----------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 871908e15f..2ee7bf9438 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -98,16 +98,21 @@ void pushLogFile(String FILE_NAME) { } void pushK8SLogs(String TEST_NAME) { - def LOG_FILE_PATH="e2e-tests/logs/" - def FILE_NAMES="logs_${TEST_NAME}_*" + def LOG_FILE_PATH="e2e-tests/logs" echo "Push k8s logs to S3!" withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', accessKeyVariable: 'AWS_ACCESS_KEY_ID', credentialsId: 'AMI/OVF', secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) { sh """ - S3_PATH=s3://percona-jenkins-artifactory/\$JOB_NAME/\$(git rev-parse --short HEAD)/logs/ - aws s3 ls \$S3_PATH || : - aws s3 rm \$S3_PATH --recursive --exclude "*" --include "${FILE_NAMES}" || : - aws s3 cp --quiet ${LOG_FILE_PATH} \$S3_PATH --recursive --exclude "*" --include "$FILE_NAMES" || : + if [ -d "${LOG_FILE_PATH}/${TEST_NAME}" ]; then + zip -r ${TEST_NAME}.zip ${LOG_FILE_PATH}/${TEST_NAME} || : + rm -rf ${LOG_FILE_PATH}/${TEST_NAME} + + S3_PATH=s3://percona-jenkins-artifactory/\$JOB_NAME/\$(git rev-parse --short HEAD)/logs + aws s3 ls \$S3_PATH/ || : + aws s3 rm \$S3_PATH/${TEST_NAME}.zip || : + aws s3 cp --quiet ${TEST_NAME}.zip \$S3_PATH/ || : + rm -f ${TEST_NAME}.zip + fi """ } } @@ -259,6 +264,7 @@ pipeline { CLUSTER_NAME = sh(script: "echo jen-psmdb-${env.CHANGE_ID}-${GIT_SHORT_COMMIT}-${env.BUILD_NUMBER} | tr '[:upper:]' '[:lower:]'", , returnStdout: true).trim() AUTHOR_NAME = sh(script: "echo ${CHANGE_AUTHOR_EMAIL} | awk -F'@' '{print \$1}'", , returnStdout: true).trim() ENABLE_LOGGING = "true" + ENABLE_LOG_COLLECT = "true" } agent { label 'docker' diff --git a/e2e-tests/functions b/e2e-tests/functions index b3a4c5a29f..cef4a593d9 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -1322,28 +1322,29 @@ function generate_vs_json() { } collect_k8s_logs() { - if [[ ${ENABLE_LOGGING} == "true" ]]; then - rm -f ${logs_dir}/logs_${test_name}_* || : - + if [[ ${ENABLE_LOG_COLLECT} == "true" ]]; then local check_namespaces="${namespace}${OPERATOR_NS:+ $OPERATOR_NS}" + local logs_path="${logs_dir}/${test_name}" + rm -rf ${logs_path} || : + mkdir -p $logs_path for ns in ${check_namespaces}; do local pods=$(kubectl_bin get pods -n "${ns}" -o name | awk -F "/" '{print $2}') for p in ${pods}; do - kubectl_bin -n "${ns}" describe pod ${p} >${logs_dir}/logs_${test_name}_${ns}_${p}.dsc || : + kubectl_bin -n "${ns}" describe pod ${p} >${logs_path}/pod_${ns}_${p}.dsc || : local containers=$(kubectl_bin -n "${ns}" get pod ${p} -o jsonpath='{.spec.containers[*].name}') for c in ${containers}; do - kubectl_bin -n "${ns}" logs ${p} -c ${c} >${logs_dir}/logs_${test_name}_${ns}_${p}_${c}.txt || : - echo logs saved in: ${logs_dir}/logs_${test_name}_${p}_${c}.txt + kubectl_bin -n "${ns}" logs ${p} -c ${c} >${logs_path}/${ns}_${p}_${c}.log || : + echo "logs saved in: ${logs_path}/${ns}_${p}_${c}.log" done done done for object in psmdb psmdb-backup psmdb-restore pods deployments services events sts; do - echo "##### START: ${ns}: ${object} #####" >>${logs_dir}/logs_${test_name}_${ns}_simple.txt - kubectl_bin get ${object} -n "${ns}" >>${logs_dir}/logs_${test_name}_${ns}_simple.txt || : - echo "##### END: ${ns}: ${object} ####\n" >>${logs_dir}/logs_${test_name}_${ns}_simple.txt - kubectl_bin get ${object} -n "${ns}" -oyaml >${logs_dir}/logs_${test_name}_${ns}_${object}.yaml || : - kubectl_bin describe ${object} -n "${ns}" >${logs_dir}/logs_${test_name}_${ns}_${object}.dsc || : + echo "##### START: ${ns}: ${object} #####" >>${logs_path}/_overview_${ns}.txt + kubectl_bin get ${object} -n "${ns}" >>${logs_path}/_overview_${ns}.txt || : + echo -e "##### END: ${ns}: ${object} ####\n" >>${logs_path}/_overview_${ns}.txt + kubectl_bin get ${object} -n "${ns}" -oyaml >${logs_path}/${object}_${ns}.yaml || : + kubectl_bin describe ${object} -n "${ns}" >${logs_path}/${object}_${ns}.dsc || : done fi } From a3a09ec7a6ca52dd6fcd7b5bcdf9ae35a8a03965 Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Fri, 22 Sep 2023 14:00:19 +0200 Subject: [PATCH 06/15] Change compression method --- Jenkinsfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 2ee7bf9438..2524206cf0 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -104,14 +104,14 @@ void pushK8SLogs(String TEST_NAME) { withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', accessKeyVariable: 'AWS_ACCESS_KEY_ID', credentialsId: 'AMI/OVF', secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) { sh """ if [ -d "${LOG_FILE_PATH}/${TEST_NAME}" ]; then - zip -r ${TEST_NAME}.zip ${LOG_FILE_PATH}/${TEST_NAME} || : + env GZIP=-9 tar -zcvf ${TEST_NAME}.tar.gz -C ${LOG_FILE_PATH} ${TEST_NAME} rm -rf ${LOG_FILE_PATH}/${TEST_NAME} S3_PATH=s3://percona-jenkins-artifactory/\$JOB_NAME/\$(git rev-parse --short HEAD)/logs aws s3 ls \$S3_PATH/ || : - aws s3 rm \$S3_PATH/${TEST_NAME}.zip || : - aws s3 cp --quiet ${TEST_NAME}.zip \$S3_PATH/ || : - rm -f ${TEST_NAME}.zip + aws s3 rm \$S3_PATH/${TEST_NAME}.tar.gz || : + aws s3 cp --quiet ${TEST_NAME}.tar.gz \$S3_PATH/ || : + rm -f ${TEST_NAME}.tar.gz fi """ } From 7aba7aee39f8822c584b40c9865357aca357ccfe Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Mon, 16 Oct 2023 13:13:07 +0200 Subject: [PATCH 07/15] Run shfmt on split-horizon test --- e2e-tests/split-horizon/run | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/e2e-tests/split-horizon/run b/e2e-tests/split-horizon/run index 66df795d8b..fb7cf4ae8c 100755 --- a/e2e-tests/split-horizon/run +++ b/e2e-tests/split-horizon/run @@ -16,7 +16,7 @@ configure_client_hostAliases() { hostAliasesJson=$(echo $hostAliasesJson | jq --argjson newAlias "$hostAlias" '. += [$newAlias]') done - kubectl_bin patch deployment psmdb-client --type='json' -p="[{'op': 'replace', 'path': '/spec/template/spec/hostAliases', 'value': $hostAliasesJson}]" + kubectl_bin patch deployment psmdb-client --type='json' -p="[{'op': 'replace', 'path': '/spec/template/spec/hostAliases', 'value': $hostAliasesJson}]" wait_pod $(kubectl_bin get pods --selector=name=psmdb-client -o 'jsonpath={.items[].metadata.name}') } @@ -51,22 +51,22 @@ diff $test_dir/compare/horizons-3.json $tmp_dir/horizons-3.json isMaster=$(run_mongo_tls "db.isMaster().ismaster" "clusterAdmin:clusterAdmin123456@some-name-rs0-0.clouddemo.xyz,some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" mongodb "" "--quiet" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:' | grep -v certificateNames) if [ "${isMaster}" != "true" ]; then collect_k8s_logs - echo "mongo client should've redirect the connection to primary" - exit 1 + echo "mongo client should've redirect the connection to primary" + exit 1 fi # stepping down to ensure we haven't redirected to primary just because primary is pod-0 run_mongo_tls "rs.stepDown()" \ - "clusterAdmin:clusterAdmin123456@some-name-rs0-0.clouddemo.xyz,some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" \ - mongodb "" "--quiet" + "clusterAdmin:clusterAdmin123456@some-name-rs0-0.clouddemo.xyz,some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" \ + mongodb "" "--quiet" sleep 10 # give some time for re-election isMaster=$(run_mongo_tls "db.isMaster().ismaster" "clusterAdmin:clusterAdmin123456@some-name-rs0-0.clouddemo.xyz,some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" mongodb "" "--quiet" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:' | grep -v certificateNames) if [ "${isMaster}" != "true" ]; then collect_k8s_logs - echo "mongo client should've redirect the connection to primary" - exit 1 + echo "mongo client should've redirect the connection to primary" + exit 1 fi apply_cluster ${test_dir}/conf/${cluster}-5horizons.yml From d34bb06913c1f242d26904b2e3b91d052fd64dea Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Tue, 17 Oct 2023 10:52:16 +0200 Subject: [PATCH 08/15] Fix namespace in collect_k8s_logs function --- e2e-tests/functions | 13 +++++++------ e2e-tests/monitoring-2-0/run | 1 + 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/e2e-tests/functions b/e2e-tests/functions index cef4a593d9..12af936e30 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -1325,8 +1325,9 @@ collect_k8s_logs() { if [[ ${ENABLE_LOG_COLLECT} == "true" ]]; then local check_namespaces="${namespace}${OPERATOR_NS:+ $OPERATOR_NS}" local logs_path="${logs_dir}/${test_name}" + rm -rf ${logs_path} || : - mkdir -p $logs_path + mkdir -p ${logs_path} for ns in ${check_namespaces}; do local pods=$(kubectl_bin get pods -n "${ns}" -o name | awk -F "/" '{print $2}') @@ -1340,11 +1341,11 @@ collect_k8s_logs() { done done for object in psmdb psmdb-backup psmdb-restore pods deployments services events sts; do - echo "##### START: ${ns}: ${object} #####" >>${logs_path}/_overview_${ns}.txt - kubectl_bin get ${object} -n "${ns}" >>${logs_path}/_overview_${ns}.txt || : - echo -e "##### END: ${ns}: ${object} ####\n" >>${logs_path}/_overview_${ns}.txt - kubectl_bin get ${object} -n "${ns}" -oyaml >${logs_path}/${object}_${ns}.yaml || : - kubectl_bin describe ${object} -n "${ns}" >${logs_path}/${object}_${ns}.dsc || : + echo "##### START: NS: ${namespace} - OBJ: ${object} #####" >>${logs_path}/_overview_${namespace}.txt + kubectl_bin get ${object} -n "${namespace}" >>${logs_path}/_overview_${namespace}.txt || : + echo -e "##### END: NS: ${namespace} - OBJ: ${object} ####\n" >>${logs_path}/_overview_${namespace}.txt + kubectl_bin get ${object} -n "${namespace}" -oyaml >${logs_path}/${object}_${namespace}.yaml || : + kubectl_bin describe ${object} -n "${namespace}" >${logs_path}/${object}_${namespace}.dsc || : done fi } diff --git a/e2e-tests/monitoring-2-0/run b/e2e-tests/monitoring-2-0/run index 533ecbf4fa..b8ab9c015f 100755 --- a/e2e-tests/monitoring-2-0/run +++ b/e2e-tests/monitoring-2-0/run @@ -151,6 +151,7 @@ if [[ -n ${OPENSHIFT} ]]; then fi if [[ $(kubectl_bin logs monitoring-rs0-0 pmm-client | grep -c 'cannot auto discover databases and collections') != 0 ]]; then + collect_k8s_logs echo "error: cannot auto discover databases and collections" exit 1 fi From 24ca96cae7ef71563844b338295af50aa65fd10c Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Tue, 17 Oct 2023 11:05:21 +0200 Subject: [PATCH 09/15] Fix collect_k8s_logs for collecting events --- e2e-tests/functions | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/e2e-tests/functions b/e2e-tests/functions index 12af936e30..c45991cfd7 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -1340,13 +1340,14 @@ collect_k8s_logs() { done done done - for object in psmdb psmdb-backup psmdb-restore pods deployments services events sts; do + for object in psmdb psmdb-backup psmdb-restore pods deployments replicasets services sts configmaps persistentvolumeclaims persistentvolumes secrets jobs cronjobs clusterroles roles; do echo "##### START: NS: ${namespace} - OBJ: ${object} #####" >>${logs_path}/_overview_${namespace}.txt kubectl_bin get ${object} -n "${namespace}" >>${logs_path}/_overview_${namespace}.txt || : echo -e "##### END: NS: ${namespace} - OBJ: ${object} ####\n" >>${logs_path}/_overview_${namespace}.txt kubectl_bin get ${object} -n "${namespace}" -oyaml >${logs_path}/${object}_${namespace}.yaml || : kubectl_bin describe ${object} -n "${namespace}" >${logs_path}/${object}_${namespace}.dsc || : done + kubectl_bin get events --all-namespaces >${logs_path}/events.yaml || : fi } From 20850d9404ac4020aa4a41f271a562fac89d9308 Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Thu, 19 Oct 2023 09:26:18 +0200 Subject: [PATCH 10/15] Add collecting of mongo logs in collect_k8s_logs --- Jenkinsfile | 2 +- e2e-tests/functions | 41 +++++++++++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 2524206cf0..40852558c0 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -264,7 +264,7 @@ pipeline { CLUSTER_NAME = sh(script: "echo jen-psmdb-${env.CHANGE_ID}-${GIT_SHORT_COMMIT}-${env.BUILD_NUMBER} | tr '[:upper:]' '[:lower:]'", , returnStdout: true).trim() AUTHOR_NAME = sh(script: "echo ${CHANGE_AUTHOR_EMAIL} | awk -F'@' '{print \$1}'", , returnStdout: true).trim() ENABLE_LOGGING = "true" - ENABLE_LOG_COLLECT = "true" + ENABLE_K8S_LOGGING = "true" } agent { label 'docker' diff --git a/e2e-tests/functions b/e2e-tests/functions index c45991cfd7..e0f76aa699 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -720,7 +720,8 @@ run_mongo() { local suffix=${4:-.svc.cluster.local} local client_container=$(kubectl_bin get pods --selector=name=psmdb-client -o 'jsonpath={.items[].metadata.name}') local mongo_flag="$5" - [[ $uri == *cfg* ]] && replica_set='cfg' || replica_set='rs0' + local replica_set=$(echo "$uri" | sed -r 's/.*\-(rs[0-9]|cfg)\..*/\1/') + kubectl_bin exec ${client_container} -- \ bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?ssl=false\&replicaSet=$replica_set $mongo_flag" @@ -733,7 +734,8 @@ run_mongo_tls() { local suffix=${4:-.svc.cluster.local} local client_container=$(kubectl_bin get pods --selector=name=psmdb-client -o 'jsonpath={.items[].metadata.name}') local mongo_flag="$5" - [[ $uri == *cfg* ]] && replica_set='cfg' || replica_set='rs0' + local replica_set=$(echo "$uri" | sed -r 's/.*\-(rs[0-9]|cfg)\..*/\1/') + kubectl_bin exec ${client_container} -- \ bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?replicaSet=$replica_set --tls --tlsCAFile /etc/mongodb-ssl/ca.crt --tlsCertificateKeyFile /tmp/tls.pem --tlsAllowInvalidHostnames $mongo_flag" @@ -1322,7 +1324,7 @@ function generate_vs_json() { } collect_k8s_logs() { - if [[ ${ENABLE_LOG_COLLECT} == "true" ]]; then + if [[ ${ENABLE_K8S_LOGGING} == "true" ]]; then local check_namespaces="${namespace}${OPERATOR_NS:+ $OPERATOR_NS}" local logs_path="${logs_dir}/${test_name}" @@ -1335,19 +1337,42 @@ collect_k8s_logs() { kubectl_bin -n "${ns}" describe pod ${p} >${logs_path}/pod_${ns}_${p}.dsc || : local containers=$(kubectl_bin -n "${ns}" get pod ${p} -o jsonpath='{.spec.containers[*].name}') for c in ${containers}; do - kubectl_bin -n "${ns}" logs ${p} -c ${c} >${logs_path}/${ns}_${p}_${c}.log || : + kubectl_bin -n "${ns}" logs ${p} -c ${c} >${logs_path}/container_${p}_${c}.log || : echo "logs saved in: ${logs_path}/${ns}_${p}_${c}.log" done done done - for object in psmdb psmdb-backup psmdb-restore pods deployments replicasets services sts configmaps persistentvolumeclaims persistentvolumes secrets jobs cronjobs clusterroles roles; do - echo "##### START: NS: ${namespace} - OBJ: ${object} #####" >>${logs_path}/_overview_${namespace}.txt + for object in psmdb psmdb-backup psmdb-restore pods deployments replicasets services sts configmaps persistentvolumeclaims persistentvolumes secrets roles issuer certificate; do + echo "##### START: ${object} NS: ${namespace} #####" >>${logs_path}/_overview_${namespace}.txt kubectl_bin get ${object} -n "${namespace}" >>${logs_path}/_overview_${namespace}.txt || : - echo -e "##### END: NS: ${namespace} - OBJ: ${object} ####\n" >>${logs_path}/_overview_${namespace}.txt + echo -e "##### END: ${object} NS: ${namespace} #####\n" >>${logs_path}/_overview_${namespace}.txt kubectl_bin get ${object} -n "${namespace}" -oyaml >${logs_path}/${object}_${namespace}.yaml || : kubectl_bin describe ${object} -n "${namespace}" >${logs_path}/${object}_${namespace}.dsc || : done - kubectl_bin get events --all-namespaces >${logs_path}/events.yaml || : + kubectl_bin get events --all-namespaces >${logs_path}/_events.log || : + kubectl_bin get nodes >${logs_path}/_nodes.log || : + kubectl_bin get clusterroles >${logs_path}/_clusterroles.log || : + + local secret psmdb_secret psmdb_user psmdb_pass + for psmdb_name in "$(kubectl_bin get psmdb -n ${namespace} -o custom-columns=NAME:.metadata.name --no-headers=true)"; do + psmdb_secret="$(kubectl_bin get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.secrets.users}')" + if [[ ${psmdb_secret} ]]; then secret="${psmdb_secret}"; else secret="${psmdb_name}-secrets"; fi + psmdb_user="$(kubectl_bin get secrets ${psmdb_secret} -ojsonpath='{.data.MONGODB_BACKUP_USER}' | base64 --decode)" + psmdb_pass="$(kubectl_bin get secrets ${psmdb_secret} -ojsonpath='{.data.MONGODB_BACKUP_PASSWORD}' | base64 --decode)" + if [[ "$(kubectl_bin get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.sharding.enabled}')" == "true" ]]; then + local cfg_replica="cfg" + echo "##### sh.status() #####" >${logs_path}/mongos_${psmdb_name}.mongo + run_mongos 'sh.status()' "${psmdb_user}:${psmdb_pass}@${psmdb_name}-mongos.${namespace}" >>${logs_path}/mongos_${psmdb_name}.mongo + fi + for psmdb_replset in $(kubectl_bin get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.replsets[*].name}' | awk '{print $0" '${cfg_replica}'"}'); do + local command=("rs.status()" "rs.config()" "db.printSlaveReplicationInfo()" "db.serverCmdLineOpts()" "db.getRoles()" "db.getUsers()") + for com in "${command[@]}"; do + echo "##### START: ${com} #####" >>${logs_path}/mongodb_${psmdb_name}_${psmdb_replset}.mongo + run_mongo "${com}" "${psmdb_user}:${psmdb_pass}@${psmdb_name}-${psmdb_replset}.${namespace}" >>${logs_path}/mongodb_${psmdb_name}_${psmdb_replset}.mongo + echo -e "##### END: ${com} #####\n" >>${logs_path}/mongodb_${psmdb_name}_${psmdb_replset}.mongo + done + done + done fi } From 384c6995e8650d3643665af933c488cc0c7c0b4d Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Thu, 19 Oct 2023 13:18:19 +0200 Subject: [PATCH 11/15] functions - fix setting replica_set in run_mongo --- e2e-tests/functions | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/e2e-tests/functions b/e2e-tests/functions index e0f76aa699..7aef33c0f6 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -720,7 +720,7 @@ run_mongo() { local suffix=${4:-.svc.cluster.local} local client_container=$(kubectl_bin get pods --selector=name=psmdb-client -o 'jsonpath={.items[].metadata.name}') local mongo_flag="$5" - local replica_set=$(echo "$uri" | sed -r 's/.*\-(rs[0-9]|cfg)\..*/\1/') + local replica_set=$(echo "$uri" | sed -r 's/.*\-(rs[0-9]|cfg)(\.|-).*/\1/') kubectl_bin exec ${client_container} -- \ bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?ssl=false\&replicaSet=$replica_set $mongo_flag" @@ -734,7 +734,7 @@ run_mongo_tls() { local suffix=${4:-.svc.cluster.local} local client_container=$(kubectl_bin get pods --selector=name=psmdb-client -o 'jsonpath={.items[].metadata.name}') local mongo_flag="$5" - local replica_set=$(echo "$uri" | sed -r 's/.*\-(rs[0-9]|cfg)\..*/\1/') + local replica_set=$(echo "$uri" | sed -r 's/.*\-(rs[0-9]|cfg)(\.|-).*/\1/') kubectl_bin exec ${client_container} -- \ bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?replicaSet=$replica_set --tls --tlsCAFile /etc/mongodb-ssl/ca.crt --tlsCertificateKeyFile /tmp/tls.pem --tlsAllowInvalidHostnames $mongo_flag" @@ -1142,6 +1142,9 @@ kubectl_bin() { cat "$LAST_OUT" cat "$LAST_ERR" >&2 rm "$LAST_OUT" "$LAST_ERR" + if [ ${exit_status} != 0 ]; then + collect_k8s_logs + fi return ${exit_status} } From c2cce7820a840d49fac0a573bef660c711a7181d Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Thu, 19 Oct 2023 14:38:45 +0200 Subject: [PATCH 12/15] Remove kubectl_bin from collect_k8s_logs and delete_crd --- e2e-tests/functions | 48 ++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/e2e-tests/functions b/e2e-tests/functions index 7aef33c0f6..3bb7bc468b 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -617,7 +617,7 @@ wait_for_delete() { let retry+=1 if [ $retry -ge $wait_time ]; then collect_k8s_logs - kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ + kubectl logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ @@ -906,13 +906,13 @@ deploy_cert_manager() { delete_crd() { desc 'get and delete old CRDs and RBAC' - kubectl_bin delete -f "${src_dir}/deploy/crd.yaml" --ignore-not-found --wait=false || : + kubectl delete -f "${src_dir}/deploy/crd.yaml" --ignore-not-found --wait=false || : for crd_name in $(yq eval '.metadata.name' "${src_dir}/deploy/crd.yaml" | grep -v '\-\-\-'); do kubectl get ${crd_name} --all-namespaces -o wide \ | grep -v 'NAMESPACE' \ | xargs -L 1 sh -xc 'kubectl patch '${crd_name}' -n $0 $1 --type=merge -p "{\"metadata\":{\"finalizers\":[]}}"' \ || : - kubectl_bin wait --for=delete crd ${crd_name} || : + kubectl wait --for=delete crd ${crd_name} || : done local rbac_yaml='rbac.yaml' @@ -920,7 +920,7 @@ delete_crd() { rbac_yaml='cw-rbac.yaml' fi - kubectl_bin delete -f "${src_dir}/deploy/$rbac_yaml" --ignore-not-found || true + kubectl delete -f "${src_dir}/deploy/$rbac_yaml" --ignore-not-found || true } destroy() { @@ -929,7 +929,7 @@ destroy() { desc 'destroy cluster/operator and all other resources' if [ ${ignore_logs} == "false" ] && [ "${DEBUG_TESTS}" == 1 ]; then - kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ + kubectl logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ @@ -952,9 +952,9 @@ destroy() { oc delete --grace-period=0 --force=true project "$OPERATOR_NS" & fi else - kubectl_bin delete --grace-period=0 --force=true namespace "$namespace" & + kubectl delete --grace-period=0 --force=true namespace "$namespace" & if [ -n "$OPERATOR_NS" ]; then - kubectl_bin delete --grace-period=0 --force=true namespace "$OPERATOR_NS" & + kubectl delete --grace-period=0 --force=true namespace "$OPERATOR_NS" & fi fi rm -rf ${tmp_dir} @@ -1257,7 +1257,7 @@ check_crd_for_deletion() { local git_tag="$1" for crd_name in $(curl -s https://raw.githubusercontent.com/percona/percona-server-mongodb-operator/${git_tag}/deploy/crd.yaml | yq eval '.metadata.name' | $sed 's/---//g' | $sed ':a;N;$!ba;s/\n/ /g'); do - if [[ $(kubectl_bin get crd/${crd_name} -o jsonpath='{.status.conditions[-1].type}') == "Terminating" ]]; then + if [[ $(kubectl get crd/${crd_name} -o jsonpath='{.status.conditions[-1].type}') == "Terminating" ]]; then kubectl get ${crd_name} --all-namespaces -o wide \ | grep -v 'NAMESPACE' \ | xargs -L 1 sh -xc 'kubectl patch '${crd_name}' -n $0 $1 --type=merge -p "{\"metadata\":{\"finalizers\":[]}}"' \ @@ -1335,39 +1335,39 @@ collect_k8s_logs() { mkdir -p ${logs_path} for ns in ${check_namespaces}; do - local pods=$(kubectl_bin get pods -n "${ns}" -o name | awk -F "/" '{print $2}') + local pods=$(kubectl get pods -n "${ns}" -o name | awk -F "/" '{print $2}') for p in ${pods}; do - kubectl_bin -n "${ns}" describe pod ${p} >${logs_path}/pod_${ns}_${p}.dsc || : - local containers=$(kubectl_bin -n "${ns}" get pod ${p} -o jsonpath='{.spec.containers[*].name}') + kubectl -n "${ns}" describe pod ${p} >${logs_path}/pod_${ns}_${p}.dsc || : + local containers=$(kubectl -n "${ns}" get pod ${p} -o jsonpath='{.spec.containers[*].name}') for c in ${containers}; do - kubectl_bin -n "${ns}" logs ${p} -c ${c} >${logs_path}/container_${p}_${c}.log || : + kubectl -n "${ns}" logs ${p} -c ${c} >${logs_path}/container_${p}_${c}.log || : echo "logs saved in: ${logs_path}/${ns}_${p}_${c}.log" done done done for object in psmdb psmdb-backup psmdb-restore pods deployments replicasets services sts configmaps persistentvolumeclaims persistentvolumes secrets roles issuer certificate; do echo "##### START: ${object} NS: ${namespace} #####" >>${logs_path}/_overview_${namespace}.txt - kubectl_bin get ${object} -n "${namespace}" >>${logs_path}/_overview_${namespace}.txt || : + kubectl get ${object} -n "${namespace}" >>${logs_path}/_overview_${namespace}.txt || : echo -e "##### END: ${object} NS: ${namespace} #####\n" >>${logs_path}/_overview_${namespace}.txt - kubectl_bin get ${object} -n "${namespace}" -oyaml >${logs_path}/${object}_${namespace}.yaml || : - kubectl_bin describe ${object} -n "${namespace}" >${logs_path}/${object}_${namespace}.dsc || : + kubectl get ${object} -n "${namespace}" -oyaml >${logs_path}/${object}_${namespace}.yaml || : + kubectl describe ${object} -n "${namespace}" >${logs_path}/${object}_${namespace}.dsc || : done - kubectl_bin get events --all-namespaces >${logs_path}/_events.log || : - kubectl_bin get nodes >${logs_path}/_nodes.log || : - kubectl_bin get clusterroles >${logs_path}/_clusterroles.log || : + kubectl get events --all-namespaces >${logs_path}/_events.log || : + kubectl get nodes >${logs_path}/_nodes.log || : + kubectl get clusterroles >${logs_path}/_clusterroles.log || : local secret psmdb_secret psmdb_user psmdb_pass - for psmdb_name in "$(kubectl_bin get psmdb -n ${namespace} -o custom-columns=NAME:.metadata.name --no-headers=true)"; do - psmdb_secret="$(kubectl_bin get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.secrets.users}')" + for psmdb_name in "$(kubectl get psmdb -n ${namespace} -o custom-columns=NAME:.metadata.name --no-headers=true)"; do + psmdb_secret="$(kubectl get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.secrets.users}')" if [[ ${psmdb_secret} ]]; then secret="${psmdb_secret}"; else secret="${psmdb_name}-secrets"; fi - psmdb_user="$(kubectl_bin get secrets ${psmdb_secret} -ojsonpath='{.data.MONGODB_BACKUP_USER}' | base64 --decode)" - psmdb_pass="$(kubectl_bin get secrets ${psmdb_secret} -ojsonpath='{.data.MONGODB_BACKUP_PASSWORD}' | base64 --decode)" - if [[ "$(kubectl_bin get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.sharding.enabled}')" == "true" ]]; then + psmdb_user="$(kubectl get secrets ${psmdb_secret} -ojsonpath='{.data.MONGODB_BACKUP_USER}' | base64 --decode)" + psmdb_pass="$(kubectl get secrets ${psmdb_secret} -ojsonpath='{.data.MONGODB_BACKUP_PASSWORD}' | base64 --decode)" + if [[ "$(kubectl get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.sharding.enabled}')" == "true" ]]; then local cfg_replica="cfg" echo "##### sh.status() #####" >${logs_path}/mongos_${psmdb_name}.mongo run_mongos 'sh.status()' "${psmdb_user}:${psmdb_pass}@${psmdb_name}-mongos.${namespace}" >>${logs_path}/mongos_${psmdb_name}.mongo fi - for psmdb_replset in $(kubectl_bin get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.replsets[*].name}' | awk '{print $0" '${cfg_replica}'"}'); do + for psmdb_replset in $(kubectl get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.replsets[*].name}' | awk '{print $0" '${cfg_replica}'"}'); do local command=("rs.status()" "rs.config()" "db.printSlaveReplicationInfo()" "db.serverCmdLineOpts()" "db.getRoles()" "db.getUsers()") for com in "${command[@]}"; do echo "##### START: ${com} #####" >>${logs_path}/mongodb_${psmdb_name}_${psmdb_replset}.mongo From 98f85dcefc7f2cae84fca421f0ec21471429b889 Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Fri, 20 Oct 2023 09:46:26 +0200 Subject: [PATCH 13/15] functions - fix run_mongo and run_mongos for collecting k8s logs --- e2e-tests/functions | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/e2e-tests/functions b/e2e-tests/functions index 3bb7bc468b..f97f9264d3 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -720,11 +720,15 @@ run_mongo() { local suffix=${4:-.svc.cluster.local} local client_container=$(kubectl_bin get pods --selector=name=psmdb-client -o 'jsonpath={.items[].metadata.name}') local mongo_flag="$5" - local replica_set=$(echo "$uri" | sed -r 's/.*\-(rs[0-9]|cfg)(\.|-).*/\1/') - - kubectl_bin exec ${client_container} -- \ - bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?ssl=false\&replicaSet=$replica_set $mongo_flag" + local replica_set=$(echo "$uri" | grep -oE '\-(rs[0-9]|cfg)(\.|-)' | sed 's/^.//;s/.$//' | head -n1) + if [[ ${FUNCNAME[1]} == "collect_k8s_logs" ]]; then + kubectl exec ${client_container} -- \ + bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?ssl=false\&replicaSet=$replica_set $mongo_flag" + else + kubectl_bin exec ${client_container} -- \ + bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?ssl=false\&replicaSet=$replica_set $mongo_flag" + fi } run_mongo_tls() { @@ -734,11 +738,15 @@ run_mongo_tls() { local suffix=${4:-.svc.cluster.local} local client_container=$(kubectl_bin get pods --selector=name=psmdb-client -o 'jsonpath={.items[].metadata.name}') local mongo_flag="$5" - local replica_set=$(echo "$uri" | sed -r 's/.*\-(rs[0-9]|cfg)(\.|-).*/\1/') - - kubectl_bin exec ${client_container} -- \ - bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?replicaSet=$replica_set --tls --tlsCAFile /etc/mongodb-ssl/ca.crt --tlsCertificateKeyFile /tmp/tls.pem --tlsAllowInvalidHostnames $mongo_flag" + local replica_set=$(echo "$uri" | grep -oE '\-(rs[0-9]|cfg)(\.|-)' | sed 's/^.//;s/.$//' | head -n1) + if [[ ${FUNCNAME[1]} == "collect_k8s_logs" ]]; then + kubectl exec ${client_container} -- \ + bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?replicaSet=$replica_set --tls --tlsCAFile /etc/mongodb-ssl/ca.crt --tlsCertificateKeyFile /tmp/tls.pem --tlsAllowInvalidHostnames $mongo_flag" + else + kubectl_bin exec ${client_container} -- \ + bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?replicaSet=$replica_set --tls --tlsCAFile /etc/mongodb-ssl/ca.crt --tlsCertificateKeyFile /tmp/tls.pem --tlsAllowInvalidHostnames $mongo_flag" + fi } run_mongos() { @@ -1372,7 +1380,7 @@ collect_k8s_logs() { for com in "${command[@]}"; do echo "##### START: ${com} #####" >>${logs_path}/mongodb_${psmdb_name}_${psmdb_replset}.mongo run_mongo "${com}" "${psmdb_user}:${psmdb_pass}@${psmdb_name}-${psmdb_replset}.${namespace}" >>${logs_path}/mongodb_${psmdb_name}_${psmdb_replset}.mongo - echo -e "##### END: ${com} #####\n" >>${logs_path}/mongodb_${psmdb_name}_${psmdb_replset}.mongo + echo -e "##### END: ${com} #####\n" >>${logs_path}/mongodb_${psmdb_name}_${psmdb_replset}.mongo done done done From 31091a0176281b998085c474db56fae012883a40 Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Fri, 20 Oct 2023 13:23:33 +0200 Subject: [PATCH 14/15] Remove sleep from arbiter test --- e2e-tests/arbiter/run | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e-tests/arbiter/run b/e2e-tests/arbiter/run index 900b011fb0..47cd389ef4 100755 --- a/e2e-tests/arbiter/run +++ b/e2e-tests/arbiter/run @@ -25,7 +25,7 @@ check_cr_config() { desc 'wait for convergence' local arbiter_ip=$(get_service_ip $cluster-arbiter-0) local URI="$(get_service_ip $cluster-0),$(get_service_ip $cluster-1),$(get_service_ip $cluster-arbiter-0)" - sleep 240 + wait_cluster_consistency "${cluster/-rs0/}" # check arbiter liveness if [[ $(kubectl_bin get pod \ From 1338970f3d7ed487d676f996f7ced93ea0d33aed Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Thu, 27 Jun 2024 13:06:13 +0200 Subject: [PATCH 15/15] K8SPSMDB-1080 - Use trap to catch exit status --- Jenkinsfile | 2 +- e2e-tests/arbiter/run | 1 - e2e-tests/balancer/run | 1 - e2e-tests/cross-site-sharded/run | 1 - e2e-tests/data-at-rest-encryption/run | 2 -- e2e-tests/data-sharded/run | 3 -- e2e-tests/default-cr/run | 1 - e2e-tests/demand-backup-physical-sharded/run | 3 -- e2e-tests/demand-backup-physical/run | 1 - e2e-tests/demand-backup-sharded/run | 1 - e2e-tests/demand-backup/run | 2 -- e2e-tests/expose-sharded/run | 2 -- e2e-tests/functions | 34 ++++++-------------- e2e-tests/init-deploy/run | 1 - e2e-tests/mongod-major-upgrade-sharded/run | 1 - e2e-tests/mongod-major-upgrade/run | 1 - e2e-tests/monitoring-2-0/run | 2 -- e2e-tests/multi-cluster-service/run | 3 -- e2e-tests/rs-shard-migration/run | 4 --- e2e-tests/self-healing-chaos/run | 1 - e2e-tests/service-per-pod/run | 1 - e2e-tests/smart-update/run | 3 -- e2e-tests/split-horizon/run | 2 -- e2e-tests/tls-issue-cert-manager/run | 1 - e2e-tests/upgrade-sharded/run | 6 ---- e2e-tests/upgrade/run | 5 --- e2e-tests/version-service/run | 10 +++--- 27 files changed, 14 insertions(+), 81 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 40852558c0..6d6adf3fb9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -505,7 +505,7 @@ EOF unstash 'IMAGE' def IMAGE = sh(returnStdout: true, script: "cat results/docker/TAG").trim() - TestsReport = TestsReport + "\r\n\r\ncommit: ${env.CHANGE_URL}/commits/${env.GIT_COMMIT}\r\nimage: `${IMAGE}`\r\n" + TestsReport = TestsReport + "\r\n\r\ncommit: ${env.CHANGE_URL}/commits/${env.GIT_COMMIT}\r\nimage: `${IMAGE}`\r\nlogs: `s3://percona-jenkins-artifactory/cloud-psmdb-operator/PR-${env.CHANGE_ID}/${GIT_SHORT_COMMIT}/logs/`" pullRequest.comment(TestsReport) } } diff --git a/e2e-tests/arbiter/run b/e2e-tests/arbiter/run index 47cd389ef4..fa853cdb56 100755 --- a/e2e-tests/arbiter/run +++ b/e2e-tests/arbiter/run @@ -31,7 +31,6 @@ check_cr_config() { if [[ $(kubectl_bin get pod \ --selector=statefulset.kubernetes.io/pod-name="${cluster}-arbiter-0" \ -o jsonpath='{.items[*].status.containerStatuses[?(@.name == "mongod-arbiter")].restartCount}') -gt 0 ]]; then - collect_k8s_logs echo "Something went wrong with arbiter. Exiting..." exit 1 fi diff --git a/e2e-tests/balancer/run b/e2e-tests/balancer/run index 7855b4d50b..7272c411c8 100755 --- a/e2e-tests/balancer/run +++ b/e2e-tests/balancer/run @@ -15,7 +15,6 @@ check_balancer() { | grep -E -v "Percona Server for MongoDB|connecting to:|Implicit session:|versions do not match|Error saving history file:|bye") if [[ $balancer_running != "$expected" ]]; then - collect_k8s_logs echo "Unexpected output from \"db.adminCommand({balancerStatus: 1}).mode\": $balancer_running" echo "Expected $expected" exit 1 diff --git a/e2e-tests/cross-site-sharded/run b/e2e-tests/cross-site-sharded/run index 9f5e8dbd86..47c688f7f3 100755 --- a/e2e-tests/cross-site-sharded/run +++ b/e2e-tests/cross-site-sharded/run @@ -101,7 +101,6 @@ for i in "rs0" "rs1"; do done if [[ $shards -lt 2 ]]; then - collect_k8s_logs echo "data is only on some of the shards, maybe sharding is not working" exit 1 fi diff --git a/e2e-tests/data-at-rest-encryption/run b/e2e-tests/data-at-rest-encryption/run index f6b60cc854..ff2c08b65d 100755 --- a/e2e-tests/data-at-rest-encryption/run +++ b/e2e-tests/data-at-rest-encryption/run @@ -83,7 +83,6 @@ encrypted_cluster_log=$(kubectl_bin logs some-name-rs0-0 -c mongod -n $namespace echo "$encrypted_cluster_log" if [ -z "$encrypted_cluster_log" ]; then - collect_k8s_logs echo "Cluster is not encrypted" exit 1 fi @@ -100,7 +99,6 @@ until [ "$retry" -ge 10 ]; do echo "Cluster is not encrypted already" break elif [ $retry == 15 ]; then - collect_k8s_logs echo "Max retry count $retry reached. Cluster is still encrypted" exit 1 else diff --git a/e2e-tests/data-sharded/run b/e2e-tests/data-sharded/run index ecfd985cea..5c77ed5a78 100755 --- a/e2e-tests/data-sharded/run +++ b/e2e-tests/data-sharded/run @@ -17,7 +17,6 @@ check_rs_proper_component_deletion() { until [[ $(kubectl_bin get sts -l app.kubernetes.io/instance=${cluster},app.kubernetes.io/replset=${rs_name} -ojson | jq '.items | length') -eq 0 ]]; do let retry+=1 if [ $retry -ge 70 ]; then - collect_k8s_logs sts_count=$(kubectl_bin get sts -l app.kubernetes.io/instance=${cluster},app.kubernetes.io/replset=${rs_name} -ojson | jq '.items | length') echo "Replset $rs_name not properly removed, expected sts count of 0 but got $sts_count. Exiting after $retry tries..." exit 1 @@ -116,7 +115,6 @@ main() { done if [[ $shards -lt 3 ]]; then - collect_k8s_logs echo "data is only on some of the shards, maybe sharding is not working" exit 1 fi @@ -127,7 +125,6 @@ main() { "clusterAdmin:clusterAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \ "--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls") if ! echo $res | grep -q '"ok" : 1'; then - collect_k8s_logs echo "app database not dropped. Exiting.." exit 1 fi diff --git a/e2e-tests/default-cr/run b/e2e-tests/default-cr/run index fbd64cd579..199b481f25 100755 --- a/e2e-tests/default-cr/run +++ b/e2e-tests/default-cr/run @@ -27,7 +27,6 @@ function stop_cluster() { let passed_time="${passed_time}+${sleep_time}" sleep ${sleep_time} if [[ ${passed_time} -gt ${max_wait_time} ]]; then - collect_k8s_logs echo "We've been waiting for cluster stop for too long. Exiting..." exit 1 fi diff --git a/e2e-tests/demand-backup-physical-sharded/run b/e2e-tests/demand-backup-physical-sharded/run index e0efefa122..e08867d333 100755 --- a/e2e-tests/demand-backup-physical-sharded/run +++ b/e2e-tests/demand-backup-physical-sharded/run @@ -38,7 +38,6 @@ run_recovery_check() { wait_restore "${backup_name}" "${cluster}" "ready" "0" "1800" kubectl_bin get psmdb ${cluster} -o yaml if [ $(kubectl_bin get psmdb ${cluster} -o yaml | yq '.metadata.annotations."percona.com/resync-pbm"') == null ]; then - collect_k8s_logs echo "psmdb/${cluster} should be annotated with percona.com/resync-pbm after a physical restore" exit 1 fi @@ -53,7 +52,6 @@ check_exported_mongos_service_endpoint() { local host=$1 if [ "$host" != "$(kubectl_bin get psmdb $cluster -o=jsonpath='{.status.host}')" ]; then - collect_k8s_logs echo "Exported host is not correct after the restore" exit 1 fi @@ -82,7 +80,6 @@ wait_cluster_consistency ${cluster} lbEndpoint=$(kubectl_bin get svc $cluster-mongos -o=jsonpath='{.status}' | jq -r 'select(.loadBalancer != null and .loadBalancer.ingress != null and .loadBalancer.ingress != []) | .loadBalancer.ingress[0][]') if [ -z $lbEndpoint ]; then - collect_k8s_logs echo "mongos service not exported correctly" exit 1 fi diff --git a/e2e-tests/demand-backup-physical/run b/e2e-tests/demand-backup-physical/run index 774f90a281..16d1042560 100755 --- a/e2e-tests/demand-backup-physical/run +++ b/e2e-tests/demand-backup-physical/run @@ -38,7 +38,6 @@ run_recovery_check() { wait_restore "${backup_name}" "${cluster}" "ready" "0" "1800" kubectl_bin get psmdb ${cluster} -o yaml if [ $(kubectl_bin get psmdb ${cluster} -o yaml | yq '.metadata.annotations."percona.com/resync-pbm"') == null ]; then - collect_k8s_logs echo "psmdb/${cluster} should be annotated with percona.com/resync-pbm after a physical restore" exit 1 fi diff --git a/e2e-tests/demand-backup-sharded/run b/e2e-tests/demand-backup-sharded/run index cc135e8201..94456ba08a 100755 --- a/e2e-tests/demand-backup-sharded/run +++ b/e2e-tests/demand-backup-sharded/run @@ -166,7 +166,6 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest /usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \ | grep -c ${backup_dest_minio}_ | cat) if [[ $backup_exists -eq 1 ]]; then - collect_k8s_logs echo "Backup was not removed from bucket -- minio" exit 1 fi diff --git a/e2e-tests/demand-backup/run b/e2e-tests/demand-backup/run index a9e874b467..b0f2846b4f 100755 --- a/e2e-tests/demand-backup/run +++ b/e2e-tests/demand-backup/run @@ -135,7 +135,6 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest /usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \ | grep -c ${backup_dest_minio} | cat) if [[ $backup_exists -eq 1 ]]; then - collect_k8s_logs echo "Backup was not removed from bucket -- minio" exit 1 fi @@ -171,7 +170,6 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest /usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \ | grep -c ${backup_dest_minio} | cat) if [[ $backup_exists -eq 1 ]]; then - collect_k8s_logs echo "Backup was not removed from bucket -- minio" exit 1 fi diff --git a/e2e-tests/expose-sharded/run b/e2e-tests/expose-sharded/run index cc5237f418..7e49876388 100755 --- a/e2e-tests/expose-sharded/run +++ b/e2e-tests/expose-sharded/run @@ -23,7 +23,6 @@ function stop_cluster() { let passed_time="${passed_time}+${sleep_time}" sleep ${passed_time} if [[ ${passed_time} -gt ${max_wait_time} ]]; then - collect_k8s_logs echo "We've been waiting for cluster stop for too long. Exiting..." exit 1 fi @@ -53,7 +52,6 @@ function compare_mongo_config() { rs0_0_endpoint_actual=$(run_mongo 'var host;var x=0;rs.conf().members.forEach(function(d){ if(d.tags.podName=="some-name-rs0-0"){ host=rs.conf().members[x].host;print(host)};x=x+1; })' "clusterAdmin:clusterAdmin123456@${cluster}-rs0.${namespace}" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:|bye') if [[ $rs0_0_endpoint_actual != "$rs0_0_endpoint:27017" || $cfg_0_endpoint_actual != "$cfg_0_endpoint:27017" ]]; then - collect_k8s_logs desc "Actual values rs $rs0_0_endpoint_actual and cfg $cfg_0_endpoint_actual do not match expected rs $rs0_0_endpoint:27017 and cfg $cfg_0_endpoint:27017" exit 1 fi diff --git a/e2e-tests/functions b/e2e-tests/functions index f97f9264d3..52cb34414f 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -28,6 +28,15 @@ conf_dir=$(realpath $test_dir/../conf || :) src_dir=$(realpath $test_dir/../..) logs_dir=$(realpath $test_dir/../logs) +trap cleanup EXIT HUP INT QUIT TERM +cleanup() { + exit_code=$? + if [[ ${exit_code} -ne 0 ]]; then + collect_k8s_logs + fi + exit ${exit_code} +} + if [[ ${ENABLE_LOGGING} == "true" ]]; then if [ ! -d "${logs_dir}" ]; then mkdir "${logs_dir}" @@ -150,7 +159,6 @@ wait_pod() { echo -n . let retry+=1 if [ $retry -ge 360 ]; then - collect_k8s_logs kubectl_bin describe pod/$pod kubectl_bin logs $pod kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ @@ -159,7 +167,6 @@ wait_pod() { | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 - echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -179,14 +186,12 @@ wait_cron() { echo -n . let retry+=1 if [ $retry -ge 360 ]; then - collect_k8s_logs kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 - echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -205,10 +210,8 @@ wait_backup_agent() { echo -n . let retry+=1 if [ $retry -ge 360 ]; then - collect_k8s_logs kubectl_bin logs $agent_pod -c backup-agent \ | tail -100 - echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -230,14 +233,12 @@ wait_backup() { let retry+=1 current_status=$(kubectl_bin get psmdb-backup $backup_name -o jsonpath='{.status.state}') if [[ $retry -ge 360 || ${current_status} == 'error' ]]; then - collect_k8s_logs kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 - echo "Backup object psmdb-backup/${backup_name} is in ${current_state} state." echo something went wrong with operator or kubernetes cluster exit 1 @@ -291,14 +292,12 @@ wait_deployment() { echo -n . let retry+=1 if [ $retry -ge 360 ]; then - collect_k8s_logs kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 - echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -339,7 +338,6 @@ wait_restore() { let retry+=1 current_state=$(kubectl_bin get psmdb-restore restore-$backup_name -o jsonpath='{.status.state}') if [[ $retry -ge $wait_time || ${current_state} == 'error' ]]; then - collect_k8s_logs kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ @@ -553,7 +551,6 @@ retry() { until "$@"; do if [[ $n -ge $max ]]; then - collect_k8s_logs echo "The command '$@' has failed after $n attempts." exit 1 fi @@ -593,7 +590,6 @@ wait_for_running() { timeout=$((timeout + 1)) echo -n '.' if [[ ${timeout} -gt 1500 ]]; then - collect_k8s_logs echo echo "Waiting timeout has been reached. Exiting..." exit 1 @@ -616,14 +612,12 @@ wait_for_delete() { echo -n . let retry+=1 if [ $retry -ge $wait_time ]; then - collect_k8s_logs kubectl logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 - echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -639,8 +633,6 @@ compare_generation() { current_generation="$(kubectl_bin get ${resource_type} "${resource_name}" -o jsonpath='{.metadata.generation}')" if [[ ${generation} != "${current_generation}" ]]; then - collect_k8s_logs - echo "Generation for ${resource_type}/${resource_name} is: ${current_generation}, but should be: ${generation}" exit 1 fi @@ -1011,7 +1003,6 @@ get_service_endpoint() { return fi - collect_k8s_logs exit 1 } @@ -1150,9 +1141,6 @@ kubectl_bin() { cat "$LAST_OUT" cat "$LAST_ERR" >&2 rm "$LAST_OUT" "$LAST_ERR" - if [ ${exit_status} != 0 ]; then - collect_k8s_logs - fi return ${exit_status} } @@ -1191,7 +1179,6 @@ wait_cluster_consistency() { until [[ "$(kubectl_bin get psmdb "${cluster_name}" -o jsonpath='{.status.state}')" == "ready" ]]; do let retry+=1 if [ $retry -ge $wait_time ]; then - collect_k8s_logs echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -1218,7 +1205,6 @@ check_backup_deletion() { retry=0 until [[ $(curl -sw '%{http_code}' -o /dev/null $path) -eq 403 ]] || [[ $(curl -sw '%{http_code}' -o /dev/null $path) -eq 404 ]]; do if [ $retry -ge 10 ]; then - collect_k8s_logs echo max retry count $retry reached. something went wrong with operator or kubernetes cluster echo "Backup was not removed from bucket -- $storage_name" exit 1 @@ -1280,7 +1266,6 @@ function get_mongod_ver_from_image() { version_info=$(run_simple_cli_inside_image ${image} 'mongod --version' | $sed -r 's/^.*db version v(([0-9]+\.){2}[0-9]+-[0-9]+).*$/\1/g') if [[ ! ${version_info} =~ ^([0-9]+\.){2}[0-9]+-[0-9]+$ ]]; then - collect_k8s_logs printf "No mongod version obtained from %s. Exiting" ${image} exit 1 fi @@ -1293,7 +1278,6 @@ function get_pbm_version() { local version_info=$(run_simple_cli_inside_image ${image} 'pbm-agent version' | $sed -r 's/^Version:\ (([0-9]+\.){2}[0-9]+)\ .*/\1/g') if [[ ! ${version_info} =~ ^([0-9]+\.){2}[0-9]+$ ]]; then - collect_k8s_logs printf "No pbm version obtained from %s. Exiting" ${image} exit 1 fi diff --git a/e2e-tests/init-deploy/run b/e2e-tests/init-deploy/run index 33184c1926..cf6e6cea7f 100755 --- a/e2e-tests/init-deploy/run +++ b/e2e-tests/init-deploy/run @@ -61,7 +61,6 @@ compare_mongo_cmd "find" "myApp:myPass@$cluster-2.$cluster.$namespace" desc 'check number of connections' conn_count=$(run_mongo 'db.serverStatus().connections.current' "clusterAdmin:clusterAdmin123456@$cluster.$namespace" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|bye') if [ ${conn_count} -gt ${max_conn} ]; then - collect_k8s_logs echo "Mongo connection count ${conn_count} is greater than maximum connection count limit: ${max_conn}" exit 1 fi diff --git a/e2e-tests/mongod-major-upgrade-sharded/run b/e2e-tests/mongod-major-upgrade-sharded/run index 7c92f04d9a..e4378d70c6 100755 --- a/e2e-tests/mongod-major-upgrade-sharded/run +++ b/e2e-tests/mongod-major-upgrade-sharded/run @@ -94,7 +94,6 @@ function main() { | grep -E '^\{.*\}$' | jq -r '.featureCompatibilityVersion.version') if [[ ${currentFCV} != ${version} ]]; then - collect_k8s_logs echo "FCV at the moment is ${currentFCV} and is not set to ${version} as it should. Exiting..." exit 1 fi diff --git a/e2e-tests/mongod-major-upgrade/run b/e2e-tests/mongod-major-upgrade/run index 8aed90e661..8cb58e23fc 100755 --- a/e2e-tests/mongod-major-upgrade/run +++ b/e2e-tests/mongod-major-upgrade/run @@ -89,7 +89,6 @@ function main() { | grep -E '^\{.*\}$' | jq -r '.featureCompatibilityVersion.version') if [[ ${currentFCV} != ${version} ]]; then - collect_k8s_logs echo "FCV at the moment is ${currentFCV} and is not set to ${version} as it should. Exiting..." exit 1 fi diff --git a/e2e-tests/monitoring-2-0/run b/e2e-tests/monitoring-2-0/run index b8ab9c015f..d535a2a446 100755 --- a/e2e-tests/monitoring-2-0/run +++ b/e2e-tests/monitoring-2-0/run @@ -37,7 +37,6 @@ until kubectl_bin exec monitoring-0 -- bash -c "ls -l /proc/*/exe 2>/dev/null| g sleep 5 let retry+=1 if [ $retry -ge 20 ]; then - collect_k8s_logs echo "Max retry count $retry reached. Pmm-server can't start" exit 1 fi @@ -151,7 +150,6 @@ if [[ -n ${OPENSHIFT} ]]; then fi if [[ $(kubectl_bin logs monitoring-rs0-0 pmm-client | grep -c 'cannot auto discover databases and collections') != 0 ]]; then - collect_k8s_logs echo "error: cannot auto discover databases and collections" exit 1 fi diff --git a/e2e-tests/multi-cluster-service/run b/e2e-tests/multi-cluster-service/run index 10c2be006c..4dc5b7560d 100755 --- a/e2e-tests/multi-cluster-service/run +++ b/e2e-tests/multi-cluster-service/run @@ -23,7 +23,6 @@ wait_mcs_api() { until [[ $(kubectl_bin api-resources | grep ServiceExport | wc -l) -eq 1 ]]; do let retry+=1 if [ $retry -ge 64 ]; then - collect_k8s_logs echo max retry count $retry reached. Something went wrong with MCS, probably a problem on GCP side. exit 1 fi @@ -41,7 +40,6 @@ wait_service_import() { until [[ "$(kubectl_bin get serviceimport --ignore-not-found | grep -v 'NAME' | wc -l)" -eq "9" ]]; do let retry+=1 if [ $retry -ge 64 ]; then - collect_k8s_logs echo max retry count $retry reached. Something went wrong with MCS, probably a problem in gke-mcs-importer. exit 1 fi @@ -60,7 +58,6 @@ wait_service_export() { until [[ "$(kubectl_bin get serviceexport --ignore-not-found | grep -v 'NAME' | wc -l)" -eq "9" ]]; do let retry+=1 if [ $retry -ge 64 ]; then - collect_k8s_logs echo max retry count $retry reached. Something went wrong with MCS, probably a problem in gke-mcs-exporter. exit 1 fi diff --git a/e2e-tests/rs-shard-migration/run b/e2e-tests/rs-shard-migration/run index 465fe4fc91..7020b091a6 100755 --- a/e2e-tests/rs-shard-migration/run +++ b/e2e-tests/rs-shard-migration/run @@ -38,12 +38,10 @@ function main() { wait_cluster_consistency "${cluster}" if [[ $(kubectl_bin get statefulset/${cluster}-mongos -o jsonpath='{.status.readyReplicas}') -lt 1 ]]; then - collect_k8s_logs echo "Mongos hasn't been properly started. Exiting..." exit 1 fi if [[ "$(kubectl_bin get sts/${cluster}-cfg -o jsonpath='{.status.replicas}')" != "$(kubectl_bin get sts/${cluster}-cfg -o jsonpath='{.status.readyReplicas}')" ]]; then - collect_k8s_logs echo "Cfg pods haven't been properly started. Exiting..." exit 1 fi @@ -56,7 +54,6 @@ function main() { if [[ -z "$(get_shard_parameter ${cluster} ${namespace} lastCommitedOpTime)" ]] \ && [[ -z "$(get_shard_parameter ${cluster} ${namespace} '$configServerState.opTime.ts')" ]]; then # for mongo 3.6 - collect_k8s_logs echo "Sharded cluster does not work properly" exit 1 fi @@ -73,7 +70,6 @@ function main() { || [[ -n "$(kubectl_bin get service -o jsonpath='{.items[?(@.metadata.name == "'"${cluster}-mongos"'")].metadata.name}')" ]] \ || [[ -n "$(kubectl_bin get service -o jsonpath='{.items[?(@.metadata.name == "'"${cluster}-cfg"'")].metadata.name}')" ]] \ || [[ -n "$(kubectl_bin get statefulset -o jsonpath='{.items[?(@.metadata.name == "'"${cluster}-cfg"'")].metadata.name}')" ]]; then - collect_k8s_logs echo "Transition to replicaset cluster has not been done well. Cluster does not work properly or some leftovers still exist" exit 1 fi diff --git a/e2e-tests/self-healing-chaos/run b/e2e-tests/self-healing-chaos/run index bddea5d5e0..1380150331 100755 --- a/e2e-tests/self-healing-chaos/run +++ b/e2e-tests/self-healing-chaos/run @@ -15,7 +15,6 @@ check_pod_restarted() { local new_resourceVersion=$(kubectl get pod $pod -ojson | jq '.metadata.resourceVersion' | tr -d '"') if [[ $old_resourceVersion == "$new_resourceVersion" ]]; then - collect_k8s_logs echo "Chaos mesh didn't work for some reason. Please check!!!" echo "The resourceVersion was not changed: $new_resourceVersion" exit 1 diff --git a/e2e-tests/service-per-pod/run b/e2e-tests/service-per-pod/run index cadfdf3b30..e3d2131d1b 100755 --- a/e2e-tests/service-per-pod/run +++ b/e2e-tests/service-per-pod/run @@ -64,7 +64,6 @@ check_cr_config() { compare_kubectl service/node-port-rs0-0 "-updated" current_node_port=$(kubectl_bin get svc node-port-rs0-0 -o 'jsonpath={.spec.ports[0].nodePort}') if [[ $current_node_port != "$old_node_port" ]]; then - collect_k8s_logs echo "Node port changed from ${old_node_port} to ${current_node_port}" exit 1 fi diff --git a/e2e-tests/smart-update/run b/e2e-tests/smart-update/run index ebb1178aa3..015a4d0b19 100755 --- a/e2e-tests/smart-update/run +++ b/e2e-tests/smart-update/run @@ -22,7 +22,6 @@ function check_pod_update() { echo "OK: Image ${img} was updated for pod ${pod_name}!" break elif [ ${retry} -ge 60 ]; then - collect_k8s_logs echo "Max retry count ${retry} reached." echo "ERROR: Image was not updated for pod ${pod_name}! Image is ${img}, but should be ${IMAGE_MONGOD_TO_UPDATE}." exit 1 @@ -68,7 +67,6 @@ done desc "check primary should have old image" img=$(kubectl get pod/$initial_primary -o jsonpath='{.spec.containers[0].image}') if [ "${img}" != "${IMAGE_MONGOD}" ]; then - collect_k8s_logs echo "image should be old on primary pod at that moment" exit 1 fi @@ -84,7 +82,6 @@ pods+=("${initial_primary}") for i in "${!pods[@]}"; do if [ "${pods[i]}" != "${restarted_pods[i]}" ]; then - collect_k8s_logs echo "pod ${pods[i]} is not equal to pod ${restarted_pods[i]}" exit 1 fi diff --git a/e2e-tests/split-horizon/run b/e2e-tests/split-horizon/run index fb7cf4ae8c..1519d67c14 100755 --- a/e2e-tests/split-horizon/run +++ b/e2e-tests/split-horizon/run @@ -50,7 +50,6 @@ diff $test_dir/compare/horizons-3.json $tmp_dir/horizons-3.json isMaster=$(run_mongo_tls "db.isMaster().ismaster" "clusterAdmin:clusterAdmin123456@some-name-rs0-0.clouddemo.xyz,some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" mongodb "" "--quiet" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:' | grep -v certificateNames) if [ "${isMaster}" != "true" ]; then - collect_k8s_logs echo "mongo client should've redirect the connection to primary" exit 1 fi @@ -64,7 +63,6 @@ sleep 10 # give some time for re-election isMaster=$(run_mongo_tls "db.isMaster().ismaster" "clusterAdmin:clusterAdmin123456@some-name-rs0-0.clouddemo.xyz,some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" mongodb "" "--quiet" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:' | grep -v certificateNames) if [ "${isMaster}" != "true" ]; then - collect_k8s_logs echo "mongo client should've redirect the connection to primary" exit 1 fi diff --git a/e2e-tests/tls-issue-cert-manager/run b/e2e-tests/tls-issue-cert-manager/run index ecdd860a34..e3c95720ce 100755 --- a/e2e-tests/tls-issue-cert-manager/run +++ b/e2e-tests/tls-issue-cert-manager/run @@ -20,7 +20,6 @@ check_secret_data_key() { secret_data=$(kubectl_bin get "secrets/${secret_name}" -o json | jq ".data[\"${data_key}\"]") if [ -z "$secret_data" ]; then - collect_k8s_logs exit 1 fi } diff --git a/e2e-tests/upgrade-sharded/run b/e2e-tests/upgrade-sharded/run index 9b19e7984b..08db6b2323 100755 --- a/e2e-tests/upgrade-sharded/run +++ b/e2e-tests/upgrade-sharded/run @@ -49,7 +49,6 @@ IMAGE_PMM_CLIENT=$(echo "${INIT_OPERATOR_IMAGES}" | jq -r '.versions[].matrix.pm IMAGE_BACKUP=$(echo "${INIT_OPERATOR_IMAGES}" | jq -r '.versions[].matrix.backup[].imagePath') if [[ ${TARGET_OPERATOR_VER} == "${INIT_OPERATOR_VER}" ]]; then - collect_k8s_logs echo "OPERATOR VERSION and INIT OPERATOR VERSION variables are the same: ${TARGET_OPERATOR_VER} ${INIT_OPERATOR_VER}! Something is wrong!" exit 1 fi @@ -62,7 +61,6 @@ function compare_generation() { current_generation=$(kubectl_bin get "${resource}" "${name}" -o jsonpath='{.metadata.generation}') if [[ ${generation} != "${current_generation}" ]]; then - collect_k8s_logs echo "Generation for resource type ${resource} with name ${name} is: ${current_generation}, but should be: ${generation}!" exit 1 fi @@ -75,7 +73,6 @@ function wait_cluster_consistency() { "$(kubectl_bin get psmdb "${cluster}" -o jsonpath='{.status.replsets.cfg.ready}')" == "${CLUSTER_SIZE}" ]]; do let retry+=1 if [ $retry -ge 32 ]; then - collect_k8s_logs echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -95,7 +92,6 @@ function check_applied_images() { ${IMAGE_MONGOD} == $(kubectl_bin get psmdb "${cluster}" -o jsonpath='{.spec.image}') ]]; then : Operator image has been updated correctly else - collect_k8s_logs echo 'Operator image has not been updated' exit 1 fi @@ -107,7 +103,6 @@ function check_applied_images() { ${TARGET_IMAGE_MONGOD} == $(kubectl_bin get psmdb "${cluster}" -o jsonpath='{.spec.image}') ]]; then : Cluster images have been updated correctly else - collect_k8s_logs echo 'Cluster images have not been updated' exit 1 fi @@ -153,7 +148,6 @@ function check_upgrade_order() { local nr=$(kubectl_bin get pod --sort-by=.status.startTime | grep -vE '^NAME|client|operator|minio-service' | sed -n "${start},${end}p" | grep -c "\-${pod_type}\-") if [[ ${nr} -ne ${cluster_size} ]]; then - collect_k8s_logs echo "${pod_type} was not upgraded ${upgrade_order}!" kubectl_bin get pod --sort-by=.status.startTime | grep -vE 'client|operator|minio-service' exit 1 diff --git a/e2e-tests/upgrade/run b/e2e-tests/upgrade/run index dea31faf4f..78df239e26 100755 --- a/e2e-tests/upgrade/run +++ b/e2e-tests/upgrade/run @@ -52,7 +52,6 @@ IMAGE_PMM_CLIENT=$(echo "${INIT_OPERATOR_IMAGES}" | jq -r '.versions[].matrix.pm IMAGE_BACKUP=$(echo "${INIT_OPERATOR_IMAGES}" | jq -r '.versions[].matrix.backup[].imagePath') if [[ ${TARGET_OPERATOR_VER} == "${INIT_OPERATOR_VER}" ]]; then - collect_k8s_logs echo "OPERATOR VERSION and INIT OPERATOR VERSION variables are the same: ${TARGET_OPERATOR_VER} ${INIT_OPERATOR_VER}! Something is wrong!" exit 1 fi @@ -65,7 +64,6 @@ function compare_generation() { current_generation=$(kubectl_bin get "${resource}" "${name}" -o jsonpath='{.metadata.generation}') if [[ ${generation} != "${current_generation}" ]]; then - collect_k8s_logs echo "Generation for resource type ${resource} with name ${name} is: ${current_generation}, but should be: ${generation}!" exit 1 fi @@ -77,7 +75,6 @@ function wait_cluster_consistency() { "$(kubectl_bin get psmdb "${cluster}" -o jsonpath='{.status.replsets.rs0.ready}')" == "${CLUSTER_SIZE}" ]]; do let retry+=1 if [ $retry -ge 32 ]; then - collect_k8s_logs echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -97,7 +94,6 @@ function check_applied_images() { ${IMAGE_MONGOD} == $(kubectl_bin get psmdb "${cluster}" -o jsonpath='{.spec.image}') ]]; then : Operator image has been updated correctly else - collect_k8s_logs echo 'Operator image has not been updated' exit 1 fi @@ -109,7 +105,6 @@ function check_applied_images() { ${TARGET_IMAGE_MONGOD} == $(kubectl_bin get psmdb "${cluster}" -o jsonpath='{.spec.image}') ]]; then : Cluster images have been updated correctly else - collect_k8s_logs echo 'Cluster images have not been updated' exit 1 fi diff --git a/e2e-tests/version-service/run b/e2e-tests/version-service/run index 99db6819dc..5122ee8a2f 100755 --- a/e2e-tests/version-service/run +++ b/e2e-tests/version-service/run @@ -67,7 +67,7 @@ function check_telemetry_transfer() { # operator fallback VS should have telemetry diff ${test_dir}/compare/${telemetry_log_file} <(grep -f ${tmp_dir}/${telemetry_state}_telemetry.version-service.log.json ${test_dir}/compare/${telemetry_log_file}) # CR VS should not have telemetry - [[ -s "${tmp_dir}/enabled_telemetry.version-service-cr.log.json" ]] && collect_k8s_logs && exit 1 + [[ -s "${tmp_dir}/enabled_telemetry.version-service-cr.log.json" ]] && exit 1 fi local telemetry_cr_log_file="${telemetry_state}_telemetry.version-service-cr.log${OPERATOR_NS:+-cw}.json" @@ -77,15 +77,15 @@ function check_telemetry_transfer() { # cr VS should have telemetry diff ${test_dir}/compare/${telemetry_cr_log_file} <(grep -f ${tmp_dir}/${telemetry_state}_telemetry.version-service-cr.log.json ${test_dir}/compare/${telemetry_cr_log_file}) # operator VS should not have telemetry - [[ -s ${tmp_dir}/disabled_telemetry.version-service.log.json ]] && collect_k8s_logs && exit 1 + [[ -s ${tmp_dir}/disabled_telemetry.version-service.log.json ]] && exit 1 fi desc 'telemetry was disabled in CR as well as in operator' if [ "${cr_vs_channel}" == 'disabled' -a "${telemetry_state}" == 'disabled' ]; then # CR VS should not have telemetry - [[ -s ${tmp_dir}/disabled_telemetry.version-service-cr.log.json ]] && collect_k8s_logs && exit 1 + [[ -s ${tmp_dir}/disabled_telemetry.version-service-cr.log.json ]] && exit 1 # operator VS should not have telemetry - [[ -s ${tmp_dir}/disabled_telemetry.version-service.log.json ]] && collect_k8s_logs && exit 1 + [[ -s ${tmp_dir}/disabled_telemetry.version-service.log.json ]] && exit 1 fi kubectl_bin delete pod ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) @@ -183,7 +183,6 @@ for i in "${!cases[@]}"; do pods=($(kubectl get pods -l app.kubernetes.io/name=percona-server-mongodb -o=name)) if [ ${#pods[@]} -eq 0 ]; then - collect_k8s_logs echo "pods not found" exit 1 fi @@ -191,7 +190,6 @@ for i in "${!cases[@]}"; do for pod in "${pods[@]}"; do img=$(kubectl get $pod -o jsonpath='{.spec.containers[0].image}') if [ "$img" != "$expected_image" ]; then - collect_k8s_logs echo "image was not updated" exit 1 fi