From c56762e00a6570e86d9906f08f46842f46f4737f Mon Sep 17 00:00:00 2001 From: ydshieh Date: Fri, 11 Aug 2023 10:09:21 +0200 Subject: [PATCH 1/3] fix --- .circleci/create_circleci_config.py | 91 +++++++++++++++++++++++------ 1 file changed, 74 insertions(+), 17 deletions(-) diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index f122213057b6..558211c90abe 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -18,10 +18,14 @@ import glob import os import random +import subprocess +import yaml +from base64 import b64encode from dataclasses import dataclass +from pathlib import Path from typing import Any, Dict, List, Optional -import yaml +from git import Repo COMMON_ENV_VARIABLES = { @@ -64,6 +68,8 @@ class CircleCIJob: working_directory: str = "~/transformers" # This should be only used for doctest job! command_timeout: Optional[int] = None + # The explicit checksum to use for cache load/save + checksum: Optional[str] = None def __post_init__(self): # Deal with defaults for mutable attributes. @@ -100,6 +106,14 @@ def to_dict(self): job["resource_class"] = self.resource_class if self.parallelism is not None: job["parallelism"] = self.parallelism + + checksum = self.checksum if self.checksum is not None else '{{ checksum "setup.py" }}' + save_cache = True + if self.checksum is not None: + # `setup.py` is not modified and we are not on `main` branch + cache_branch_prefix = "main" + save_cache = False + steps = [ "checkout", {"attach_workspace": {"at": "~/transformers/test_preparation"}}, @@ -107,7 +121,7 @@ def to_dict(self): "restore_cache": { "keys": [ # check the fully-matched cache first - f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-" + '{{ checksum "setup.py" }}', + f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-{checksum}", # try the partially-matched cache from `main` f"v{self.cache_version}-{self.cache_name}-main-pip-", # try the general partially-matched cache @@ -118,7 +132,7 @@ def to_dict(self): { "restore_cache": { "keys": [ - f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-" + '{{ checksum "setup.py" }}', + f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-{checksum}", f"v{self.cache_version}-{self.cache_name}-main-site-packages-", f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-", ] @@ -126,22 +140,23 @@ def to_dict(self): }, ] steps.extend([{"run": l} for l in self.install_steps]) - steps.append( - { - "save_cache": { - "key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-" + '{{ checksum "setup.py" }}', - "paths": ["~/.cache/pip"], + if save_cache: + steps.append( + { + "save_cache": { + "key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-{checksum}", + "paths": ["~/.cache/pip"], + } } - } - ) - steps.append( - { - "save_cache": { - "key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-" + '{{ checksum "setup.py" }}', - "paths": ["~/.pyenv/versions/"], + ) + steps.append( + { + "save_cache": { + "key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-{checksum}", + "paths": ["~/.pyenv/versions/"], + } } - } - ) + ) steps.append({"run": {"name": "Show installed libraries and their versions", "command": "pip freeze | tee installed.txt"}}) steps.append({"store_artifacts": {"path": "~/transformers/installed.txt"}}) @@ -531,11 +546,49 @@ def job_name(self): DOC_TESTS = [doc_test_job] +def get_main_setup_checksum(): + + PATH_TO_REPO = Path(__file__).parent.parent.resolve() + repo = Repo(PATH_TO_REPO) + + current_head = repo.head.ref + main_head = repo.refs.main + + setup_file_path = os.path.join(PATH_TO_REPO, "setup.py") + + main_head.checkout() + proc = subprocess.Popen(["sha256sum", f"{setup_file_path}"], stdout=subprocess.PIPE) + checksum = proc.stdout.read().decode().split(" ")[0] + checksum = b64encode(bytes.fromhex(checksum)).decode() + + # go back to the original branch + current_head.checkout() + + return checksum + + def create_circleci_config(folder=None): if folder is None: folder = os.getcwd() # Used in CircleCIJob.to_dict() to expand the test list (for using parallelism) os.environ["test_preparation_dir"] = folder + + test_file = os.path.join(folder, "filtered_test_list.txt") + if os.path.exists(test_file): + with open(test_file) as f: + test_list = f.read() + else: + test_list = [] + + checksum = None + # if already on `main`, don't try to use the latest commit on `main` to avoid (rare) race condition where multiple + # commits are merged into `main`. + if os.environ.get("CIRCLE_BRANCH", "pull") != "main": + # Check if `setup.py` is modified. + if test_list != "tests": + # If not, we use `setup.py` of the `latest` commit on the `main` branch to compute the checksum for cache + checksum = get_main_setup_checksum() + jobs = [] all_test_file = os.path.join(folder, "test_list.txt") if os.path.exists(all_test_file): @@ -618,6 +671,10 @@ def create_circleci_config(folder=None): "nightly": {"type": "boolean", "default": False}, "tests_to_run": {"type": "string", "default": test_list}, } + + for job in jobs: + job.checksum = checksum + config["jobs"] = {j.job_name: j.to_dict() for j in jobs} config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}} with open(os.path.join(folder, "generated_config.yml"), "w") as f: From 35ac98f31500657c31fbfa17fd52e24d0e222bb0 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Fri, 11 Aug 2023 11:35:08 +0200 Subject: [PATCH 2/3] fix --- .circleci/create_circleci_config.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index 558211c90abe..ac5ea3a8b39b 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -573,21 +573,20 @@ def create_circleci_config(folder=None): # Used in CircleCIJob.to_dict() to expand the test list (for using parallelism) os.environ["test_preparation_dir"] = folder - test_file = os.path.join(folder, "filtered_test_list.txt") - if os.path.exists(test_file): - with open(test_file) as f: - test_list = f.read() - else: - test_list = [] - checksum = None # if already on `main`, don't try to use the latest commit on `main` to avoid (rare) race condition where multiple # commits are merged into `main`. if os.environ.get("CIRCLE_BRANCH", "pull") != "main": # Check if `setup.py` is modified. - if test_list != "tests": - # If not, we use `setup.py` of the `latest` commit on the `main` branch to compute the checksum for cache - checksum = get_main_setup_checksum() + summary_file = os.path.join(folder, "tests_fetched_summary.txt") + if os.path.exists(summary_file): + with open(summary_file) as f: + tests_fetched_summary = f.read() + setup_file_modifiled = "### TEST TO RUN ###\n- tests\n" in tests_fetched_summary + if not setup_file_modifiled: + # If not, we use `setup.py` of the `latest` commit on the `main` branch to compute the checksum for + # cache + checksum = get_main_setup_checksum() jobs = [] all_test_file = os.path.join(folder, "test_list.txt") From 0ba080b2b59fb9b03fb699416925a13e099e53e4 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Fri, 11 Aug 2023 12:10:40 +0200 Subject: [PATCH 3/3] fix --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7fd676c761a6..ce0fc87a5c9b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -34,9 +34,9 @@ jobs: - run: pip install -U --upgrade-strategy eager GitPython - run: pip install -U --upgrade-strategy eager . - run: mkdir -p test_preparation - - run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt + - run: python utils/tests_fetcher.py | tee test_preparation/tests_fetched_summary.txt - store_artifacts: - path: ~/transformers/tests_fetched_summary.txt + path: test_preparation/tests_fetched_summary.txt - run: | if [ -f test_list.txt ]; then cp test_list.txt test_preparation/test_list.txt