diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index ae86e33c66..8ed4927c6b 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -1,7 +1,8 @@ def Machine = 'none' def machine = 'none' def CUSTOM_WORKSPACE = 'none' -def caseList = '' +def cases = '' +def GH = 'none' // Location of the custom workspaces for each machine in the CI system. They are persitent for each iteration of the PR. def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea'] def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/stmp/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI'] @@ -78,6 +79,7 @@ pipeline { echo "Getting Common Workspace for ${Machine}" ws("${custom_workspace[machine]}/${env.CHANGE_ID}") { properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hercules-EMC', 'Hera-EMC', 'Orion-EMC'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])]) + GH = sh(script: "which gh || echo '~/bin/gh'", returnStdout: true).trim() CUSTOM_WORKSPACE = "${WORKSPACE}" sh(script: "mkdir -p ${CUSTOM_WORKSPACE}/RUNTESTS;rm -Rf ${CUSTOM_WORKSPACE}/RUNTESTS/*") sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """) @@ -97,7 +99,7 @@ pipeline { } } stages { - stage('build system') { + stage('Building') { steps { catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { script { @@ -116,7 +118,7 @@ pipeline { checkout scm } catch (Exception e) { if (env.CHANGE_ID) { - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Checkout **Failed** on ${Machine}: ${e.getMessage()}" """) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Checkout **Failed** on ${Machine} in Build# ${env.BUILD_NUMBER}: ${e.getMessage()}" """) } STATUS = 'Failed' error("Failed to checkout: ${e.getMessage()}") @@ -149,7 +151,7 @@ pipeline { try { sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_BUILD_${env.CHANGE_ID}") gist_url=sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_BUILD_${env.CHANGE_ID}", returnStdout: true).trim() - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Build **FAILED** on **${Machine}** with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Build **FAILED** on **${Machine}** in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) } catch (Exception error_comment) { echo "Failed to comment on PR: ${error_comment.getMessage()}" } @@ -169,7 +171,7 @@ pipeline { } } if (system == 'gfs') { - caseList = sh(script: "${HOMEgfs}/ci/scripts/utils/get_host_case_list.py ${machine}", returnStdout: true).trim().split() + cases = sh(script: "${HOMEgfs}/ci/scripts/utils/get_host_case_list.py ${machine}", returnStdout: true).trim().split() } } } @@ -184,98 +186,88 @@ pipeline { when { expression { STATUS != 'Failed' } } - matrix { - agent { label NodeName[machine].toLowerCase() } - axes { - axis { - name 'Case' - // TODO add dynamic list of cases from env vars (needs addtional plugins) - values 'C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA' - } - } - stages { - - stage('Create Experiments') { - when { - expression { return caseList.contains(Case) } - } - steps { - catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { - script { - sh(script: "sed -n '/{.*}/!p' ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml > ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml.tmp") - def yaml_case = readYaml file: "${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml.tmp" - system = yaml_case.experiment.system - def HOMEgfs = "${CUSTOM_WORKSPACE}/${system}" // local HOMEgfs is used to populate the XML on per system basis - env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS" - try { - error_output = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${Case}.yaml", returnStdout: true).trim() - } catch (Exception error_create) { - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experment on ${Machine}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """) - error("Case ${Case} failed to create experment directory") - } + agent { label NodeName[machine].toLowerCase() } + steps { + script { + def parallelStages = cases.collectEntries { caseName -> + ["${caseName}": { + stage("Create ${caseName}") { + catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { + script { + sh(script: "sed -n '/{.*}/!p' ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml > ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp") + def yaml_case = readYaml file: "${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp" + system = yaml_case.experiment.system + def HOMEgfs = "${CUSTOM_WORKSPACE}/${system}" // local HOMEgfs is used to populate the XML on per system basis + env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS" + try { + error_output = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml", returnStdout: true).trim() + } catch (Exception error_create) { + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """) + error("Case ${caseName} failed to create experment directory") + } + } } } - } - } - stage('Run Experiments') { - when { - expression { return caseList.contains(Case) } - } - steps { - script { - HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments - def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${Case}", returnStdout: true).trim() - def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" - sh(script: " rm -f ${error_file}") - try { - sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system}") - } catch (Exception error_experment) { - sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}") - ws(CUSTOM_WORKSPACE) { - def error_logs = "" - def error_logs_message = "" - if (fileExists(error_file)) { - def fileContent = readFile error_file - def lines = fileContent.readLines() - for (line in lines) { - echo "archiving: ${line}" - if (fileExists("${CUSTOM_WORKSPACE}/${line}") && readFile("${CUSTOM_WORKSPACE}/${line}").length() > 0) { - try { - archiveArtifacts artifacts: "${line}", fingerprint: true - error_logs = error_logs + "${CUSTOM_WORKSPACE}/${line} " - error_logs_message = error_logs_message + "${CUSTOM_WORKSPACE}/${line}\n" - } catch (Exception error_arch) { - echo "Failed to archive error log ${line}: ${error_arch.getMessage()}" + stage("Running ${caseName}") { + catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { + script { + HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments + def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${caseName}", returnStdout: true).trim() + def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" + sh(script: " rm -f ${error_file}") + try { + sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system}") + } catch (Exception error_experment) { + sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}") + ws(CUSTOM_WORKSPACE) { + def error_logs = "" + def error_logs_message = "" + if (fileExists(error_file)) { + def fileContent = readFile error_file + def lines = fileContent.readLines() + for (line in lines) { + echo "archiving: ${line}" + if (fileExists("${CUSTOM_WORKSPACE}/${line}") && readFile("${CUSTOM_WORKSPACE}/${line}").length() > 0) { + try { + archiveArtifacts artifacts: "${line}", fingerprint: true + error_logs = error_logs + "${CUSTOM_WORKSPACE}/${line} " + error_logs_message = error_logs_message + "${CUSTOM_WORKSPACE}/${line}\n" + } catch (Exception error_arch) { + echo "Failed to archive error log ${line}: ${error_arch.getMessage()}" + } + } + } + try { + gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim() + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) + sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}") + } catch (Exception error_comment) { + echo "Failed to comment on PR: ${error_comment.getMessage()}" + } + } else { + echo "No error logs found for failed cases in $CUSTOM_WORKSPACE/RUNTESTS/${pslot}_error.logs" + } + STATUS = 'Failed' + try { + sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """) + } catch (Exception e) { + echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}" + } + error("Failed to run experiments ${caseName} on ${Machine}") } } - } - try { - gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim() - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) - sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}") - } catch (Exception error_comment) { - echo "Failed to comment on PR: ${error_comment.getMessage()}" - } - } else { - echo "No error logs found for failed cases in $CUSTOM_WORKSPACE/RUNTESTS/${pslot}_error.logs" } - STATUS = 'Failed' - try { - sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """) - } catch (Exception e) { - echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}" - } - echo "Failed to run experiments ${Case} on ${Machine}" } - } } - } + }] } + parallel parallelStages + [failFast: true] } } } + stage( '5. FINALIZE' ) { agent { label NodeName[machine].toLowerCase() } @@ -291,7 +283,7 @@ pipeline { """, returnStatus: true) sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) if (fileExists("${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log")) { - sh(script: """echo "**CI ${STATUS}** ${Machine} at
Built and ran in directory \\`${CUSTOM_WORKSPACE}\\`\n\\`\\`\\`\n" | cat - ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log > temp && mv temp ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log""", returnStatus: true) + sh(script: """echo "**CI ${STATUS}** on ${Machine} in Build# ${env.BUILD_NUMBER}
Built and ran in directory \\`${CUSTOM_WORKSPACE}\\`\n\\`\\`\\`\n" | cat - ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log > temp && mv temp ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log""", returnStatus: true) sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body-file ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log """, returnStatus: true) } if (STATUS == 'Passed') {