Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reuse the cache created for latest main on PRs/branches if setup.py is not modified #25445

Merged
merged 3 commits into from
Aug 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ jobs:
- run: pip install -U --upgrade-strategy eager GitPython
- run: pip install -U --upgrade-strategy eager .
- run: mkdir -p test_preparation
- run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt
- run: python utils/tests_fetcher.py | tee test_preparation/tests_fetched_summary.txt
- store_artifacts:
path: ~/transformers/tests_fetched_summary.txt
path: test_preparation/tests_fetched_summary.txt
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need this in create_circleci_config.py to check if tests is in the list

- run: |
if [ -f test_list.txt ]; then
cp test_list.txt test_preparation/test_list.txt
Expand Down
90 changes: 73 additions & 17 deletions .circleci/create_circleci_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,14 @@
import glob
import os
import random
import subprocess
import yaml
from base64 import b64encode
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional

import yaml
from git import Repo


COMMON_ENV_VARIABLES = {
Expand Down Expand Up @@ -64,6 +68,8 @@ class CircleCIJob:
working_directory: str = "~/transformers"
# This should be only used for doctest job!
command_timeout: Optional[int] = None
# The explicit checksum to use for cache load/save
checksum: Optional[str] = None

def __post_init__(self):
# Deal with defaults for mutable attributes.
Expand Down Expand Up @@ -100,14 +106,22 @@ def to_dict(self):
job["resource_class"] = self.resource_class
if self.parallelism is not None:
job["parallelism"] = self.parallelism

checksum = self.checksum if self.checksum is not None else '{{ checksum "setup.py" }}'
save_cache = True
if self.checksum is not None:
# `setup.py` is not modified and we are not on `main` branch
cache_branch_prefix = "main"
save_cache = False

steps = [
"checkout",
{"attach_workspace": {"at": "~/transformers/test_preparation"}},
{
"restore_cache": {
"keys": [
# check the fully-matched cache first
f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-" + '{{ checksum "setup.py" }}',
f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-{checksum}",
# try the partially-matched cache from `main`
f"v{self.cache_version}-{self.cache_name}-main-pip-",
# try the general partially-matched cache
Expand All @@ -118,30 +132,31 @@ def to_dict(self):
{
"restore_cache": {
"keys": [
f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-" + '{{ checksum "setup.py" }}',
f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-{checksum}",
f"v{self.cache_version}-{self.cache_name}-main-site-packages-",
f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-",
]
}
},
]
steps.extend([{"run": l} for l in self.install_steps])
steps.append(
{
"save_cache": {
"key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-" + '{{ checksum "setup.py" }}',
"paths": ["~/.cache/pip"],
if save_cache:
steps.append(
{
"save_cache": {
"key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-{checksum}",
"paths": ["~/.cache/pip"],
}
}
}
)
steps.append(
{
"save_cache": {
"key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-" + '{{ checksum "setup.py" }}',
"paths": ["~/.pyenv/versions/"],
)
steps.append(
{
"save_cache": {
"key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-{checksum}",
"paths": ["~/.pyenv/versions/"],
}
}
}
)
)
steps.append({"run": {"name": "Show installed libraries and their versions", "command": "pip freeze | tee installed.txt"}})
steps.append({"store_artifacts": {"path": "~/transformers/installed.txt"}})

Expand Down Expand Up @@ -531,11 +546,48 @@ def job_name(self):
DOC_TESTS = [doc_test_job]


def get_main_setup_checksum():

PATH_TO_REPO = Path(__file__).parent.parent.resolve()
repo = Repo(PATH_TO_REPO)

current_head = repo.head.ref
main_head = repo.refs.main

setup_file_path = os.path.join(PATH_TO_REPO, "setup.py")

main_head.checkout()
proc = subprocess.Popen(["sha256sum", f"{setup_file_path}"], stdout=subprocess.PIPE)
checksum = proc.stdout.read().decode().split(" ")[0]
checksum = b64encode(bytes.fromhex(checksum)).decode()

# go back to the original branch
current_head.checkout()

return checksum


def create_circleci_config(folder=None):
if folder is None:
folder = os.getcwd()
# Used in CircleCIJob.to_dict() to expand the test list (for using parallelism)
os.environ["test_preparation_dir"] = folder

checksum = None
# if already on `main`, don't try to use the latest commit on `main` to avoid (rare) race condition where multiple
# commits are merged into `main`.
if os.environ.get("CIRCLE_BRANCH", "pull") != "main":
# Check if `setup.py` is modified.
summary_file = os.path.join(folder, "tests_fetched_summary.txt")
if os.path.exists(summary_file):
with open(summary_file) as f:
tests_fetched_summary = f.read()
setup_file_modifiled = "### TEST TO RUN ###\n- tests\n" in tests_fetched_summary
Comment on lines +581 to +585
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to check the summary file instead of filtered_test_list as it is already changed to a list in filter_tests method

    if test_files == ["tests"]:
        test_files = [os.path.join("tests", f) for f in os.listdir("tests") if f not in ["__init__.py"] + filters]

if not setup_file_modifiled:
# If not, we use `setup.py` of the `latest` commit on the `main` branch to compute the checksum for
# cache
checksum = get_main_setup_checksum()

jobs = []
all_test_file = os.path.join(folder, "test_list.txt")
if os.path.exists(all_test_file):
Expand Down Expand Up @@ -618,6 +670,10 @@ def create_circleci_config(folder=None):
"nightly": {"type": "boolean", "default": False},
"tests_to_run": {"type": "string", "default": test_list},
}

for job in jobs:
job.checksum = checksum

config["jobs"] = {j.job_name: j.to_dict() for j in jobs}
config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
with open(os.path.join(folder, "generated_config.yml"), "w") as f:
Expand Down