Skip to content

Commit

Permalink
Add logic to stop full CI if some fast and required checks fail (proj…
Browse files Browse the repository at this point in the history
…ect-chip#36000)

* Add job cancel logic and intentional failure.

* Give github token permissions on actions

* Update logic to have a scheduled job

* Restyle

* Undo manual change

* Remove unnedded files

* Undo extra changes

* Update date-time sorting

* Fix up none possibility on updated at

* Be more strict on max age now that we use udpate time

* Add support for minute age PRs since we run this so frequently

* Make cutoff to not be a rolling target

* Use created time if updated time is not available

* Make date-time display be reasonable

* Null handle fix

* Restyle

---------

Co-authored-by: Andrei Litvin <andreilitvin@google.com>
  • Loading branch information
2 people authored and yyzhong-g committed Dec 11, 2024
1 parent b0a19d1 commit 2804720
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 34 deletions.
24 changes: 10 additions & 14 deletions .github/workflows/cancel_workflows_for_pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.

name: Cancel workflow on PR
name: Cancel workflows on failing CI
on:
workflow_dispatch:
inputs:
pull_request_id:
description: 'PR number to consider'
required: true
type: number
commit_sha:
description: 'Cancel runs for this specific SHA'
required: true
type: string
schedule:
- cron: "*/10 * * * *"

jobs:
cancel_workflow:
name: Report on pull requests
name: Cancel CI on failing pull requests

runs-on: ubuntu-latest

Expand All @@ -50,6 +43,9 @@ jobs:
- name: Cancel runs
run: |
scripts/tools/cancel_workflows_for_pr.py \
--pull-request ${{ inputs.pull_request_id }} \
--commit-sha "${{ inputs.commit_sha }}" \
--gh-api-token "${{ secrets.GITHUB_TOKEN }}"
--gh-api-token "${{ secrets.GITHUB_TOKEN }}" \
--require "Restyled" \
--require "Lint Code Base" \
--require "ZAP" \
--require "Run misspell" \
--max-pr-age-minutes 20
136 changes: 116 additions & 20 deletions scripts/tools/cancel_workflows_for_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,17 @@
# limitations under the License.
#

import datetime
import logging
import re
from typing import Optional, Set

import click
import coloredlogs
from dateutil.tz import tzlocal
from github import Github
from github.Commit import Commit
from github.PullRequest import PullRequest

__LOG_LEVELS__ = {
"debug": logging.DEBUG,
Expand All @@ -32,28 +38,99 @@


class Canceller:
def __init__(self, token):
def __init__(self, token: str, dry_run: bool):
self.api = Github(token)
self.repo = self.api.get_repo(REPOSITORY)
self.dry_run = dry_run

def check_all_pending_prs(self, max_age, required_runs):
cutoff = datetime.datetime.now(tzlocal()) - max_age
logging.info("Searching PRs updated after %s", cutoff)
for pr in self.repo.get_pulls(state="open", sort="updated", direction="desc"):
pr_update = pr.updated_at if pr.updated_at else pr.created_at
pr_update = pr_update.astimezone(tzlocal())

if pr_update < cutoff:
logging.warning(
"PR is too old (since %s, cutoff at %s). Skipping the rest...",
pr_update,
cutoff,
)
break
logging.info(
"Examining PR %d updated at %s: %s", pr.number, pr_update, pr.title
)
self.check_pr(pr, required_runs)

def check_pr(self, pr: PullRequest, required_runs):

last_commit: Optional[Commit] = None

def cancel_all_runs(self, pr_number, commit_sha, dry_run):
pr = self.repo.get_pull(pr_number)
logging.info("Examining PR '%s'", pr.title)
for commit in pr.get_commits():
if commit.sha != commit_sha:
logging.info("Skipping SHA '%s' as it was not selected", commit.sha)
logging.debug(" Found commit %s", commit.sha)
if pr.head.sha == commit.sha:
last_commit = commit
break

if last_commit is None:
logging.error("Could not find any commit in the pull request.")
return

logging.info("Last commit is: %s", last_commit.sha)

in_progress_workflows: Set[int] = set()
failed_check_names: Set[str] = set()

# Gather all workflows along with failed workflow names
for check_suite in last_commit.get_check_suites():
if check_suite.conclusion == "success":
# Finished without errors. Nothing to do here
continue
for run in check_suite.get_check_runs():
if run.conclusion is not None and run.conclusion != "failure":
logging.debug(
" Run %s is not interesting: (state %s, conclusion %s)",
run.name,
run.status,
run.conclusion,
)
continue

for check_suite in commit.get_check_suites():
for run in check_suite.get_check_runs():
if run.status in {"in_progress", "queued"}:
if dry_run:
logging.warning("DRY RUN: Will not stop run %s", run.name)
else:
logging.warning("Stopping run %s", run.name)
self.repo.get_workflow_run(run.id).cancel()
else:
logging.info("Skip over run %s (%s)", run.name, run.status)
# TODO: I am unclear how to really find the workflow id, however the
# HTML URL is like https://github.com/project-chip/connectedhomeip/actions/runs/11261874698/job/31316278705
# so need whatever is after run.
m = re.match(r".*/actions/runs/([\d]+)/job/.*", run.html_url)
if not m:
logging.error(
"Failed to extract workflow number from %s", run.html_url
)
continue

workflow_id = int(m.group(1))
if run.conclusion is None:
logging.info(
" Workflow %d (i.e. %s) still pending: %s",
workflow_id,
run.name,
run.status,
)
in_progress_workflows.add(workflow_id)
elif run.conclusion == "failure":
workflow = self.repo.get_workflow_run(workflow_id)
logging.warning(" Workflow %s Failed", workflow.name)
failed_check_names.add(workflow.name)

if not any([name in failed_check_names for name in required_runs]):
logging.info("No critical failures found")
return

for id in in_progress_workflows:
workflow = self.repo.get_workflow_run(id)
if self.dry_run:
logging.warning("DRY RUN: Will not stop %s", workflow.name)
else:
workflow.cancel()
logging.warning("Stopping workflow %s", workflow.name)


@click.command()
Expand All @@ -63,12 +140,25 @@ def cancel_all_runs(self, pr_number, commit_sha, dry_run):
type=click.Choice(list(__LOG_LEVELS__.keys()), case_sensitive=False),
help="Determines the verbosity of script output.",
)
@click.option("--pull-request", type=int, help="Pull request number to consider")
@click.option("--commit-sha", help="Commit to look at when cancelling pull requests")
@click.option("--gh-api-token", help="Github token to use")
@click.option("--token-file", help="Read github token from the given file")
@click.option("--dry-run", default=False, is_flag=True, help="Actually cancel or not")
def main(log_level, pull_request, commit_sha, gh_api_token, token_file, dry_run):
@click.option(
"--max-pr-age-days", default=0, type=int, help="How many days to look at PRs"
)
@click.option(
"--max-pr-age-minutes", default=0, type=int, help="How many minutes to look at PRs"
)
@click.option("--require", multiple=True, default=[], help="Name of required runs")
def main(
log_level,
gh_api_token,
token_file,
dry_run,
max_pr_age_days,
max_pr_age_minutes,
require,
):
coloredlogs.install(
level=__LOG_LEVELS__[log_level], fmt="%(asctime)s %(levelname)-7s %(message)s"
)
Expand All @@ -80,7 +170,13 @@ def main(log_level, pull_request, commit_sha, gh_api_token, token_file, dry_run)
else:
raise Exception("Require a --gh-api-token or --token-file to access github")

Canceller(gh_token).cancel_all_runs(pull_request, commit_sha, dry_run)
max_age = datetime.timedelta(days=max_pr_age_days, minutes=max_pr_age_minutes)
if max_age == datetime.timedelta():
raise Exception(
"Please specifiy a max age of minutes or days (--max-pr-age-days or --max-pr-age-minutes)"
)

Canceller(gh_token, dry_run).check_all_pending_prs(max_age, require)


if __name__ == "__main__":
Expand Down

0 comments on commit 2804720

Please sign in to comment.