From 218944abc8434a97023cf1e850ecbc90baad558e Mon Sep 17 00:00:00 2001 From: Tomislav Plavcic Date: Wed, 20 Sep 2023 18:55:54 +0200 Subject: [PATCH] Push failed test k8s logs to S3 bucket --- Jenkinsfile | 16 +++++++++++++ e2e-tests/functions | 55 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 036ab6093a..20599c4b77 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -103,6 +103,21 @@ void pushLogFile(String FILE_NAME) { } } +void pushK8SLogs(String TEST_NAME) { + def LOG_FILE_PATH="e2e-tests/logs/" + def FILE_NAMES="logs_${TEST_NAME}_*" + echo "Push k8s logs to S3!" + + withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', accessKeyVariable: 'AWS_ACCESS_KEY_ID', credentialsId: 'AMI/OVF', secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) { + sh """ + S3_PATH=s3://percona-jenkins-artifactory/\$JOB_NAME/\$(git rev-parse --short HEAD)/logs/ + aws s3 ls \$S3_PATH || : + aws s3 rm \$S3_PATH --recursive --exclude "*" --include "${FILE_NAMES}" || : + aws s3 cp --quiet ${LOG_FILE_PATH} \$S3_PATH --recursive --exclude "*" --include "$FILE_NAMES" || : + """ + } +} + void popArtifactFile(String FILE_NAME) { echo "Try to get $FILE_NAME file from S3!" @@ -218,6 +233,7 @@ void runTest(Integer TEST_ID) { return true } catch (exc) { + pushK8SLogs("$testName") if (retryCount >= 1 || currentBuild.nextBuild != null) { currentBuild.result = 'FAILURE' return true diff --git a/e2e-tests/functions b/e2e-tests/functions index baf53273f0..c20369d160 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -150,6 +150,7 @@ wait_pod() { echo -n . let retry+=1 if [ $retry -ge 360 ]; then + collect_k8s_logs kubectl_bin describe pod/$pod kubectl_bin logs $pod kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ @@ -158,6 +159,7 @@ wait_pod() { | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 + echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -177,12 +179,14 @@ wait_cron() { echo -n . let retry+=1 if [ $retry -ge 360 ]; then + collect_k8s_logs kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 + echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -201,8 +205,10 @@ wait_backup_agent() { echo -n . let retry+=1 if [ $retry -ge 360 ]; then + collect_k8s_logs kubectl_bin logs $agent_pod -c backup-agent \ | tail -100 + echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -224,12 +230,14 @@ wait_backup() { let retry+=1 current_status=$(kubectl_bin get psmdb-backup $backup_name -o jsonpath='{.status.state}') if [[ $retry -ge 360 || ${current_status} == 'error' ]]; then + collect_k8s_logs kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 + echo "Backup object psmdb-backup/${backup_name} is in ${current_state} state." echo something went wrong with operator or kubernetes cluster exit 1 @@ -283,12 +291,14 @@ wait_deployment() { echo -n . let retry+=1 if [ $retry -ge 360 ]; then + collect_k8s_logs kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 + echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -328,6 +338,7 @@ wait_restore() { let retry+=1 current_state=$(kubectl_bin get psmdb-restore restore-$backup_name -o jsonpath='{.status.state}') if [[ $retry -ge 720 || ${current_state} == 'error' ]]; then + collect_k8s_logs kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ @@ -528,6 +539,7 @@ retry() { until "$@"; do if [[ $n -ge $max ]]; then + collect_k8s_logs echo "The command '$@' has failed after $n attempts." exit 1 fi @@ -567,6 +579,7 @@ wait_for_running() { timeout=$((timeout + 1)) echo -n '.' if [[ ${timeout} -gt 1500 ]]; then + collect_k8s_logs echo echo "Waiting timeout has been reached. Exiting..." exit 1 @@ -588,12 +601,14 @@ wait_for_delete() { echo -n . let retry+=1 if [ $retry -ge 60 ]; then + collect_k8s_logs kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ | grep -v 'Getting pods from source' \ | tail -100 + echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -609,6 +624,8 @@ compare_generation() { current_generation="$(kubectl_bin get ${resource_type} "${resource_name}" -o jsonpath='{.metadata.generation}')" if [[ ${generation} != "${current_generation}" ]]; then + collect_k8s_logs + echo "Generation for ${resource_type}/${resource_name} is: ${current_generation}, but should be: ${generation}" exit 1 fi @@ -667,9 +684,9 @@ compare_kubectl() { del(.spec.ipFamilyPolicy) | (.. | select(. == "extensions/v1beta1")) = "apps/v1" | (.. | select(. == "batch/v1beta1")) = "batch/v1" ' - >${new_result} - + yq -i eval 'del(.spec.persistentVolumeClaimRetentionPolicy)' ${new_result} - + if version_gt "1.22"; then yq -i eval 'del(.spec.internalTrafficPolicy)' ${new_result} yq -i eval 'del(.spec.allocateLoadBalancerNodePorts)' ${new_result} @@ -961,6 +978,7 @@ get_service_endpoint() { return fi + collect_k8s_logs exit 1 } @@ -1135,6 +1153,7 @@ wait_cluster_consistency() { until [[ "$(kubectl_bin get psmdb "${cluster_name}" -o jsonpath='{.status.state}')" == "ready" ]]; do let retry+=1 if [ $retry -ge 32 ]; then + collect_k8s_logs echo max retry count $retry reached. something went wrong with operator or kubernetes cluster exit 1 fi @@ -1161,6 +1180,7 @@ check_backup_deletion() { retry=0 until [[ $(curl -sw '%{http_code}' -o /dev/null $path) -eq 403 ]] || [[ $(curl -sw '%{http_code}' -o /dev/null $path) -eq 404 ]]; do if [ $retry -ge 10 ]; then + collect_k8s_logs echo max retry count $retry reached. something went wrong with operator or kubernetes cluster echo "Backup was not removed from bucket -- $storage_name" exit 1 @@ -1222,6 +1242,7 @@ function get_mongod_ver_from_image() { version_info=$(run_simple_cli_inside_image ${image} 'mongod --version' | $sed -r 's/^.*db version v(([0-9]+\.){2}[0-9]+-[0-9]+).*$/\1/g') if [[ ! ${version_info} =~ ^([0-9]+\.){2}[0-9]+-[0-9]+$ ]]; then + collect_k8s_logs printf "No mongod version obtained from %s. Exiting" ${image} exit 1 fi @@ -1234,6 +1255,7 @@ function get_pbm_version() { local version_info=$(run_simple_cli_inside_image ${image} 'pbm-agent version' | $sed -r 's/^Version:\ (([0-9]+\.){2}[0-9]+)\ .*/\1/g') if [[ ! ${version_info} =~ ^([0-9]+\.){2}[0-9]+$ ]]; then + collect_k8s_logs printf "No pbm version obtained from %s. Exiting" ${image} exit 1 fi @@ -1274,6 +1296,33 @@ function generate_vs_json() { echo ${version_service_source} | jq '.' >${target_path} } +collect_k8s_logs() { + if [[ ${ENABLE_LOGGING} == "true" ]]; then + rm -f ${logs_dir}/logs_${test_name}_* || : + + local check_namespaces="$namespace${OPERATOR_NS:+ $OPERATOR_NS}" + + for ns in $check_namespaces; do + local pods=$(kubectl_bin get pods -o name | awk -F "/" '{print $2}') + for p in $pods; do + local containers=$(kubectl_bin -n "$ns" get pod $p -o jsonpath='{.spec.containers[*].name}') + for c in $containers; do + kubectl_bin -n "$ns" logs $p -c $c >${logs_dir}/logs_${test_name}_$p_$c.txt + echo logs saved in: ${logs_dir}/logs_${test_name}_$p_$c.txt + done + done + done + for object in "psmdb psmdb-backup psmdb-restore pods deployments services events sts"; do + echo "##### START: $object #####" >>${logs_dir}/logs_${test_name}_simple.txt + kubectl_bin get $object --all-namespaces >>${logs_dir}/logs_${test_name}_simple.txt + echo "##### END: $object ####\n" >>${logs_dir}/logs_${test_name}_simple.txt + done + for object in "psmdb psmdb-backup psmdb-restore pods deployments services events sts"; do + kubectl_bin get $object --all-namespaces >${logs_dir}/logs_${test_name}_$object.yaml + done + fi +} + check_passwords_leak() { secrets=$(kubectl_bin get secrets -o json | jq -r '.items[].data | to_entries | .[] | select(.key | (contains("_PASSWORD"))) | .value') echo secrets=$secrets @@ -1295,7 +1344,7 @@ check_passwords_leak() { if [[ ${c} =~ "pmm" ]]; then continue fi - kubectl_bin -n "$NS" logs $p -c $c > ${TEMP_DIR}/logs_output-$p-$c.txt + kubectl_bin -n "$NS" logs $p -c $c >${TEMP_DIR}/logs_output-$p-$c.txt echo logs saved in: ${TEMP_DIR}/logs_output-$p-$c.txt for pass in $passwords; do count=$(grep -c --fixed-strings -- "$pass" ${TEMP_DIR}/logs_output-$p-$c.txt || :)