Skip to content

Commit

Permalink
[CI] Retry flaky tests (elastic#53961)
Browse files Browse the repository at this point in the history
  • Loading branch information
brianseeders committed Jan 27, 2020
1 parent fd62652 commit 95ee492
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 6 deletions.
5 changes: 4 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ library 'kibana-pipeline-library'
kibanaLibrary.load()

stage("Kibana Pipeline") { // This stage is just here to help the BlueOcean UI a little bit
timeout(time: 120, unit: 'MINUTES') {
timeout(time: 135, unit: 'MINUTES') {
timestamps {
ansiColor('xterm') {
githubPr.withDefaultPrComments {
catchError {
retryable.enable()
parallel([
'kibana-intake-agent': kibanaPipeline.legacyJobRunner('kibana-intake'),
'x-pack-intake-agent': kibanaPipeline.legacyJobRunner('x-pack-intake'),
Expand Down Expand Up @@ -37,6 +38,8 @@ stage("Kibana Pipeline") { // This stage is just here to help the BlueOcean UI a
])
}
}

retryable.printFlakyFailures()
kibanaPipeline.sendMail()
}
}
Expand Down
58 changes: 55 additions & 3 deletions vars/githubPr.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ def getHistoryText(builds) {
.collect { build ->
if (build.status == "SUCCESS") {
return "* :green_heart: [Build #${build.number}](${build.url}) succeeded ${build.commit}"
} else if(build.status == "UNSTABLE") {
return "* :yellow_heart: [Build #${build.number}](${build.url}) was flaky ${build.commit}"
} else {
return "* :broken_heart: [Build #${build.number}](${build.url}) failed ${build.commit}"
}
Expand All @@ -97,18 +99,66 @@ def getHistoryText(builds) {
return "### History\n${list}"
}

def getTestFailuresMessage() {
def failures = testUtils.getFailures()
if (!failures) {
return ""
}

def messages = []

failures.take(5).each { failure ->
messages << """
---
### [Test Failures](${env.BUILD_URL}testReport)
<details><summary>${failure.fullDisplayName}</summary>
[Link to Jenkins](${failure.url})
```
${failure.stdOut}
```
</details>
---
"""
}

if (failures.size() > 3) {
messages << "and ${failures.size() - 3} more failures, only showing the first 3."
}

return messages.join("\n")
}

def getNextCommentMessage(previousCommentInfo = [:]) {
info = previousCommentInfo ?: [:]
def info = previousCommentInfo ?: [:]
info.builds = previousCommentInfo.builds ?: []

def messages = []
def status = buildUtils.getBuildStatus()

if (buildUtils.getBuildStatus() == 'SUCCESS') {
if (status == 'SUCCESS') {
messages << """
## :green_heart: Build Succeeded
* [continuous-integration/kibana-ci/pull-request](${env.BUILD_URL})
* Commit: ${getCommitHash()}
"""
} else if(status == 'UNSTABLE') {
def message = """
## :yellow_heart: Build succeeded, but was flaky
* [continuous-integration/kibana-ci/pull-request](${env.BUILD_URL})
* Commit: ${getCommitHash()}
""".stripIndent()

def failures = retryable.getFlakyFailures()
if (failures && failures.size() > 0) {
def list = failures.collect { " * ${it.label}" }.join("\n")
message += "* Flaky suites:\n${list}"
}

messages << message
} else {
messages << """
## :broken_heart: Build Failed
Expand All @@ -117,6 +167,8 @@ def getNextCommentMessage(previousCommentInfo = [:]) {
"""
}

messages << getTestFailuresMessage()

if (info.builds && info.builds.size() > 0) {
messages << getHistoryText(info.builds)
}
Expand All @@ -133,7 +185,7 @@ def getNextCommentMessage(previousCommentInfo = [:]) {

return messages
.findAll { !!it } // No blank strings
.collect { it.stripIndent().trim() }
.collect { it.stripIndent().trim() } // This just allows us to indent various strings above, but leaves them un-indented in the comment
.join("\n\n")
}

Expand Down
8 changes: 6 additions & 2 deletions vars/kibanaPipeline.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ def getOssCiGroupWorker(ciGroup) {
"CI_GROUP=${ciGroup}",
"JOB=kibana-ciGroup${ciGroup}",
]) {
runbld("./test/scripts/jenkins_ci_group.sh", "Execute kibana-ciGroup${ciGroup}")
retryable("kibana-ciGroup${ciGroup}") {
runbld("./test/scripts/jenkins_ci_group.sh", "Execute kibana-ciGroup${ciGroup}")
}
}
})
}
Expand All @@ -83,7 +85,9 @@ def getXpackCiGroupWorker(ciGroup) {
"CI_GROUP=${ciGroup}",
"JOB=xpack-kibana-ciGroup${ciGroup}",
]) {
runbld("./test/scripts/jenkins_xpack_ci_group.sh", "Execute xpack-kibana-ciGroup${ciGroup}")
retryable("xpack-kibana-ciGroup${ciGroup}") {
runbld("./test/scripts/jenkins_xpack_ci_group.sh", "Execute xpack-kibana-ciGroup${ciGroup}")
}
}
})
}
Expand Down
75 changes: 75 additions & 0 deletions vars/retryable.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import groovy.transform.Field

public static @Field GLOBAL_RETRIES_ENABLED = false
public static @Field MAX_GLOBAL_RETRIES = 1
public static @Field CURRENT_GLOBAL_RETRIES = 0
public static @Field FLAKY_FAILURES = []

def setMax(max) {
retryable.MAX_GLOBAL_RETRIES = max
}

def enable() {
retryable.GLOBAL_RETRIES_ENABLED = true
}

def enable(max) {
enable()
setMax(max)
}

def haveReachedMaxRetries() {
return retryable.CURRENT_GLOBAL_RETRIES >= retryable.MAX_GLOBAL_RETRIES
}

def getFlakyFailures() {
return retryable.FLAKY_FAILURES
}

def printFlakyFailures() {
catchError {
def failures = getFlakyFailures()

if (failures && failures.size() > 0) {
print "This build had the following flaky failures:"
failures.each {
print "\n${it.label}"
buildUtils.printStacktrace(it.exception)
}
}
}
}

def call(label, Closure closure) {
if (!retryable.GLOBAL_RETRIES_ENABLED) {
closure()
return
}

try {
closure()
} catch (ex) {
if (haveReachedMaxRetries()) {
print "Couldn't retry '${label}', have already reached the max number of retries for this build."
throw ex
}

retryable.CURRENT_GLOBAL_RETRIES++
buildUtils.printStacktrace(ex)
unstable "${label} failed but is retryable, trying a second time..."

def JOB = env.JOB ? "${env.JOB}-retry" : ""
withEnv([
"JOB=${JOB}",
]) {
closure()
}

retryable.FLAKY_FAILURES << [
label: label,
exception: ex,
]

unstable "${label} failed on the first attempt, but succeeded on the second. Marking it as flaky."
}
}

0 comments on commit 95ee492

Please sign in to comment.