From 95ee4928e657d4ed202cf8e7526fbc2918ca7dbd Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Mon, 27 Jan 2020 14:46:05 -0500 Subject: [PATCH] [CI] Retry flaky tests (#53961) --- Jenkinsfile | 5 ++- vars/githubPr.groovy | 58 +++++++++++++++++++++++++++-- vars/kibanaPipeline.groovy | 8 +++- vars/retryable.groovy | 75 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 140 insertions(+), 6 deletions(-) create mode 100644 vars/retryable.groovy diff --git a/Jenkinsfile b/Jenkinsfile index 5d770eb460816..e0b90050bfa4e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -4,11 +4,12 @@ library 'kibana-pipeline-library' kibanaLibrary.load() stage("Kibana Pipeline") { // This stage is just here to help the BlueOcean UI a little bit - timeout(time: 120, unit: 'MINUTES') { + timeout(time: 135, unit: 'MINUTES') { timestamps { ansiColor('xterm') { githubPr.withDefaultPrComments { catchError { + retryable.enable() parallel([ 'kibana-intake-agent': kibanaPipeline.legacyJobRunner('kibana-intake'), 'x-pack-intake-agent': kibanaPipeline.legacyJobRunner('x-pack-intake'), @@ -37,6 +38,8 @@ stage("Kibana Pipeline") { // This stage is just here to help the BlueOcean UI a ]) } } + + retryable.printFlakyFailures() kibanaPipeline.sendMail() } } diff --git a/vars/githubPr.groovy b/vars/githubPr.groovy index ce164ab98ab1e..4c19511bb8953 100644 --- a/vars/githubPr.groovy +++ b/vars/githubPr.groovy @@ -88,6 +88,8 @@ def getHistoryText(builds) { .collect { build -> if (build.status == "SUCCESS") { return "* :green_heart: [Build #${build.number}](${build.url}) succeeded ${build.commit}" + } else if(build.status == "UNSTABLE") { + return "* :yellow_heart: [Build #${build.number}](${build.url}) was flaky ${build.commit}" } else { return "* :broken_heart: [Build #${build.number}](${build.url}) failed ${build.commit}" } @@ -97,18 +99,66 @@ def getHistoryText(builds) { return "### History\n${list}" } +def getTestFailuresMessage() { + def failures = testUtils.getFailures() + if (!failures) { + return "" + } + + def messages = [] + + failures.take(5).each { failure -> + messages << """ +--- + +### [Test Failures](${env.BUILD_URL}testReport) +
${failure.fullDisplayName} + +[Link to Jenkins](${failure.url}) + +``` +${failure.stdOut} +``` +
+ +--- + """ + } + + if (failures.size() > 3) { + messages << "and ${failures.size() - 3} more failures, only showing the first 3." + } + + return messages.join("\n") +} + def getNextCommentMessage(previousCommentInfo = [:]) { - info = previousCommentInfo ?: [:] + def info = previousCommentInfo ?: [:] info.builds = previousCommentInfo.builds ?: [] def messages = [] + def status = buildUtils.getBuildStatus() - if (buildUtils.getBuildStatus() == 'SUCCESS') { + if (status == 'SUCCESS') { messages << """ ## :green_heart: Build Succeeded * [continuous-integration/kibana-ci/pull-request](${env.BUILD_URL}) * Commit: ${getCommitHash()} """ + } else if(status == 'UNSTABLE') { + def message = """ + ## :yellow_heart: Build succeeded, but was flaky + * [continuous-integration/kibana-ci/pull-request](${env.BUILD_URL}) + * Commit: ${getCommitHash()} + """.stripIndent() + + def failures = retryable.getFlakyFailures() + if (failures && failures.size() > 0) { + def list = failures.collect { " * ${it.label}" }.join("\n") + message += "* Flaky suites:\n${list}" + } + + messages << message } else { messages << """ ## :broken_heart: Build Failed @@ -117,6 +167,8 @@ def getNextCommentMessage(previousCommentInfo = [:]) { """ } + messages << getTestFailuresMessage() + if (info.builds && info.builds.size() > 0) { messages << getHistoryText(info.builds) } @@ -133,7 +185,7 @@ def getNextCommentMessage(previousCommentInfo = [:]) { return messages .findAll { !!it } // No blank strings - .collect { it.stripIndent().trim() } + .collect { it.stripIndent().trim() } // This just allows us to indent various strings above, but leaves them un-indented in the comment .join("\n\n") } diff --git a/vars/kibanaPipeline.groovy b/vars/kibanaPipeline.groovy index ed855a843699e..3809ef0fe99f2 100644 --- a/vars/kibanaPipeline.groovy +++ b/vars/kibanaPipeline.groovy @@ -72,7 +72,9 @@ def getOssCiGroupWorker(ciGroup) { "CI_GROUP=${ciGroup}", "JOB=kibana-ciGroup${ciGroup}", ]) { - runbld("./test/scripts/jenkins_ci_group.sh", "Execute kibana-ciGroup${ciGroup}") + retryable("kibana-ciGroup${ciGroup}") { + runbld("./test/scripts/jenkins_ci_group.sh", "Execute kibana-ciGroup${ciGroup}") + } } }) } @@ -83,7 +85,9 @@ def getXpackCiGroupWorker(ciGroup) { "CI_GROUP=${ciGroup}", "JOB=xpack-kibana-ciGroup${ciGroup}", ]) { - runbld("./test/scripts/jenkins_xpack_ci_group.sh", "Execute xpack-kibana-ciGroup${ciGroup}") + retryable("xpack-kibana-ciGroup${ciGroup}") { + runbld("./test/scripts/jenkins_xpack_ci_group.sh", "Execute xpack-kibana-ciGroup${ciGroup}") + } } }) } diff --git a/vars/retryable.groovy b/vars/retryable.groovy new file mode 100644 index 0000000000000..cc34024958aed --- /dev/null +++ b/vars/retryable.groovy @@ -0,0 +1,75 @@ +import groovy.transform.Field + +public static @Field GLOBAL_RETRIES_ENABLED = false +public static @Field MAX_GLOBAL_RETRIES = 1 +public static @Field CURRENT_GLOBAL_RETRIES = 0 +public static @Field FLAKY_FAILURES = [] + +def setMax(max) { + retryable.MAX_GLOBAL_RETRIES = max +} + +def enable() { + retryable.GLOBAL_RETRIES_ENABLED = true +} + +def enable(max) { + enable() + setMax(max) +} + +def haveReachedMaxRetries() { + return retryable.CURRENT_GLOBAL_RETRIES >= retryable.MAX_GLOBAL_RETRIES +} + +def getFlakyFailures() { + return retryable.FLAKY_FAILURES +} + +def printFlakyFailures() { + catchError { + def failures = getFlakyFailures() + + if (failures && failures.size() > 0) { + print "This build had the following flaky failures:" + failures.each { + print "\n${it.label}" + buildUtils.printStacktrace(it.exception) + } + } + } +} + +def call(label, Closure closure) { + if (!retryable.GLOBAL_RETRIES_ENABLED) { + closure() + return + } + + try { + closure() + } catch (ex) { + if (haveReachedMaxRetries()) { + print "Couldn't retry '${label}', have already reached the max number of retries for this build." + throw ex + } + + retryable.CURRENT_GLOBAL_RETRIES++ + buildUtils.printStacktrace(ex) + unstable "${label} failed but is retryable, trying a second time..." + + def JOB = env.JOB ? "${env.JOB}-retry" : "" + withEnv([ + "JOB=${JOB}", + ]) { + closure() + } + + retryable.FLAKY_FAILURES << [ + label: label, + exception: ex, + ] + + unstable "${label} failed on the first attempt, but succeeded on the second. Marking it as flaky." + } +}