Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jenkins node.env #309

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 22 additions & 66 deletions ci/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ def GH = 'none'
// Location of the custom workspaces for each machine in the CI system. They are persistent for each iteration of the PR.
def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea']
def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI']
def repo_url = 'git@github.com:NOAA-EMC/global-workflow.git'
//def repo_url = 'git@github.com:NOAA-EMC/global-workflow.git'
def repo_url = 'git@github.com:TerrenceMcGuinness-NOAA/global-workflow.git'
def STATUS = 'Passed'

pipeline {
Expand All @@ -22,7 +23,6 @@ pipeline {
stages { // This initial stage is used to get the Machine name from the GitHub labels on the PR
// which is used to designate the Nodes in the Jenkins Controller by the agent label
// Each Jenkins Node is connected to said machine via an JAVA agent via an ssh tunnel
// no op 2

stage('1. Get Machine') {
agent { label 'built-in' }
Expand Down Expand Up @@ -175,75 +175,31 @@ pipeline {
agent { label NodeName[machine].toLowerCase() }
steps {
script {
def parallelStages = CI_CASES.collectEntries { caseName ->
["${caseName}": {
stage("Create ${caseName}") {
script {
env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS"
try {
error_output = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml", returnStdout: true).trim()
} catch (Exception error_create) {
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experiment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """)
error("Case ${caseName} failed to create experiment directory")
}
}
}

stage("Running ${caseName}") {
env.HOMEGFS = HOMEgfs
env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS"
withEnv(["BASH_ENV=${CUSTOM_WORKSPACE}/workflow/gw_setup.sh"]) {
def parallelStages = CI_CASES.collectEntries { caseName ->
["${caseName}": {
stage("Create ${caseName}") {
script {
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml")
}
}
stage("Running ${caseName}") {
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
script {
def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${caseName}", returnStdout: true).trim()
def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs"
sh(script: " rm -f ${error_file}")
try {
sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} 'global-workflow'")
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cleanup_experiment ${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}")
} catch (Exception error_experment) {
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}")
ws(CUSTOM_WORKSPACE) {
def error_logs = ""
def error_logs_message = ""
if (fileExists(error_file)) {
def fileContent = readFile error_file
def lines = fileContent.readLines()
for (line in lines) {
echo "archiving: ${line}"
if (fileExists("${CUSTOM_WORKSPACE}/${line}") && readFile("${CUSTOM_WORKSPACE}/${line}").length() > 0) {
try {
archiveArtifacts artifacts: "${line}", fingerprint: true
error_logs = error_logs + "${CUSTOM_WORKSPACE}/${line} "
error_logs_message = error_logs_message + "${CUSTOM_WORKSPACE}/${line}\n"
} catch (Exception error_arch) {
echo "Failed to archive error log ${line}: ${error_arch.getMessage()}"
}
}
}
try {
gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim()
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """)
sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}")
} catch (Exception error_comment) {
echo "Failed to comment on PR: ${error_comment.getMessage()}"
}
} else {
echo "No error logs found for failed cases in ${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs"
}
STATUS = 'Failed'
try {
sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true)
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """)
} catch (Exception e) {
echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}"
}
error("Failed to run experiments ${caseName} on ${Machine}")
}
}
def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${caseName}", returnStdout: true).trim()
def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs"
sh(script: "rm -f ${error_file}")
sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} 'global-workflow'")
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cleanup_experiment ${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}")
}
}
}
}]
}
}]
}
parallel parallelStages + [failFast: true]
}
parallel parallelStages + [failFast: true]
}
}
}
Expand Down
11 changes: 6 additions & 5 deletions ci/scripts/utils/launch_java_agent.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,15 @@
controller_user=${controller_user:-"terry.mcguinness"}
controller_user_auth_token="jenkins_token"

HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )"
declare -r HOMEGFS
HOMEGFS_="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )"
host=$(hostname)

#########################################################################
# Set up runtime environment varibles for accounts on supproted machines
#########################################################################

source "${HOMEgfs}/ush/detect_machine.sh"
source "${HOMEGFS_}/ush/detect_machine.sh"
case ${MACHINE_ID} in
hera | orion | hercules | wcoss2 | gaea)
echo "Launch Jenkins Java Controler on ${MACHINE_ID}";;
Expand All @@ -84,10 +85,10 @@
LOG=lanuched_agent-$(date +%Y%m%d%M).log
rm -f "${LOG}"

source "${HOMEgfs}/ush/module-setup.sh"
module use "${HOMEgfs}/modulefiles"
source "${HOMEGFS_}/ush/module-setup.sh"
module use "${HOMEGFS_}/modulefiles"
module load "module_gwsetup.${MACHINE_ID}"
source "${HOMEgfs}/ci/platforms/config.${MACHINE_ID}"
source "${HOMEGFS_}/ci/platforms/config.${MACHINE_ID}"

JAVA_HOME="${JENKINS_AGENT_LANUCH_DIR}/JAVA/jdk-17.0.10"
if [[ ! -d "${JAVA_HOME}" ]]; then
Expand Down