Skip to content

Commit

Permalink
Use URLs as default canonical IDs in common repo rules
Browse files Browse the repository at this point in the history
RELNOTES[INC]: The `http_archive`, `http_file`, `http_jar`,
`jvm_maven_import_external`, and `jvm_import_external` repository rules
now use the URLs as the default canonical ID. If this behavior is not
desired, it can be disabled via
`--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1`. The
`--experimental_repository_cache_urls_as_default_canonical_id` flag is
now a no-op.
  • Loading branch information
fmeum committed Nov 4, 2023
1 parent 270ad1f commit 555588b
Show file tree
Hide file tree
Showing 11 changed files with 117 additions and 53 deletions.
3 changes: 3 additions & 0 deletions scripts/bootstrap/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,14 @@ fi

: ${JAVA_VERSION:="11"}

# TODO: remove `--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1` once all dependencies are mirrored.
# See https://github.com/bazelbuild/bazel/pull/19549 for more context.
_BAZEL_ARGS="--spawn_strategy=standalone \
--nojava_header_compilation \
--strategy=Javac=worker --worker_quit_after_build --ignore_unsupported_sandboxing \
--compilation_mode=opt \
--repository_cache=derived/repository_cache \
--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1 \
--extra_toolchains=//scripts/bootstrap:all \
--extra_toolchains=@bazel_tools//tools/python:autodetecting_toolchain \
--enable_bzlmod \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,6 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException {
if (repoOptions.repositoryDownloaderRetries >= 0) {
downloadManager.setRetries(repoOptions.repositoryDownloaderRetries);
}
downloadManager.setUrlsAsDefaultCanonicalId(repoOptions.urlsAsDefaultCanonicalId);

repositoryCache.setHardlink(repoOptions.useHardlinks);
if (repoOptions.experimentalScaleTimeouts > 0.0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,14 +265,10 @@ public Converter() {
@Option(
name = "experimental_repository_cache_urls_as_default_canonical_id",
defaultValue = "false",
documentationCategory = OptionDocumentationCategory.BAZEL_CLIENT_OPTIONS,
effectTags = {OptionEffectTag.LOADING_AND_ANALYSIS},
metadataTags = {OptionMetadataTag.EXPERIMENTAL},
help =
"If true, use a string derived from the URLs of repository downloads as the canonical_id "
+ "if not specified. This causes a change in the URLs to result in a redownload even "
+ "if the cache contains a download with the same hash. This can be used to verify "
+ "that URL changes don't result in broken repositories being masked by the cache.")
documentationCategory = OptionDocumentationCategory.UNDOCUMENTED,
metadataTags = OptionMetadataTag.DEPRECATED,
effectTags = {OptionEffectTag.NO_OP},
help = "No-op.")
public boolean urlsAsDefaultCanonicalId;

@Option(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ public class DownloadManager {
private final Downloader downloader;
private boolean disableDownload = false;
private int retries = 0;
private boolean urlsAsDefaultCanonicalId;
@Nullable private Credentials netrcCreds;
private CredentialFactory credentialFactory = StaticCredentials::new;

Expand Down Expand Up @@ -90,10 +89,6 @@ public void setRetries(int retries) {
this.retries = retries;
}

public void setUrlsAsDefaultCanonicalId(boolean urlsAsDefaultCanonicalId) {
this.urlsAsDefaultCanonicalId = urlsAsDefaultCanonicalId;
}

public void setNetrcCreds(Credentials netrcCreds) {
this.netrcCreds = netrcCreds;
}
Expand Down Expand Up @@ -134,9 +129,6 @@ public Path download(
if (Thread.interrupted()) {
throw new InterruptedException();
}
if (Strings.isNullOrEmpty(canonicalId) && urlsAsDefaultCanonicalId) {
canonicalId = originalUrls.stream().map(URL::toExternalForm).collect(Collectors.joining(" "));
}

// TODO(andreisolo): This code path is inconsistent as the authHeaders are fetched from a
// .netrc only if it comes from a http_{archive,file,jar} - and it is handled directly
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ public void setup(BlackBoxTestContext context) throws IOException {
String sharedRepoCache = System.getenv("REPOSITORY_CACHE");
if (sharedRepoCache != null) {
lines.add("common --repository_cache=" + sharedRepoCache);
// TODO: Remove this flag once all dependencies are mirrored.
// See https://github.com/bazelbuild/bazel/pull/19549 for more context.
lines.add("common --repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1");
if (OS.getCurrent() == OS.DARWIN) {
// For reducing SSD usage on our physical Mac machines.
lines.add("common --experimental_repository_cache_hardlinks");
Expand Down
3 changes: 3 additions & 0 deletions src/test/py/bazel/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ def setUp(self):
shared_repo_cache = os.environ.get('REPOSITORY_CACHE')
if shared_repo_cache:
f.write('common --repository_cache={}\n'.format(shared_repo_cache))
# TODO: Remove this flag once all dependencies are mirrored.
# See https://github.com/bazelbuild/bazel/pull/19549 for more context.
f.write('common --repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1')
if TestBase.IsDarwin():
# For reducing SSD usage on our physical Mac machines.
f.write('common --experimental_repository_cache_hardlinks\n')
Expand Down
38 changes: 23 additions & 15 deletions src/test/shell/bazel/bazel_repository_cache_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,9 @@ function test_fetch_value_with_existing_cache_and_no_network() {
cache_entry="$repo_cache_dir/content_addressable/sha256/$sha256"
mkdir -p "$cache_entry"
cp "$repo2_zip" "$cache_entry/file" # Artifacts are named uniformly as "file" in the cache
http_archive_url="http://localhost:$nc_port/bleh"
canonical_id_hash=$(printf "$http_archive_url" | sha256sum | cut -f 1 -d ' ')
touch "$cache_entry/id-$canonical_id_hash"

# Fetch without a server
shutdown_server
Expand Down Expand Up @@ -271,6 +274,7 @@ EOF
# to do without checksum. But we can safely do so, as the loopback device
# is reasonably safe against man-in-the-middle attacks.
bazel fetch --repository_cache="$repo_cache_dir" \
--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1 \
//zoo:breeding-program >& $TEST_log \
|| fail "expected fetch to succeed"

Expand All @@ -283,6 +287,7 @@ EOF

# As we don't have a predicted cache, we expect fetching to fail now.
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1 \
&& fail "expected failure" || :

# However, if we add the hash, the value is taken from cache
Expand All @@ -298,6 +303,7 @@ http_archive(
)
EOF
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1 \
|| fail "expected fetch to succeed"
}

Expand Down Expand Up @@ -465,15 +471,19 @@ EOF
expect_log "Error downloading"
}

function test_break_url() {
function test_http_archive_no_default_canonical_id() {
setup_repository

bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
bazel fetch --repository_cache="$repo_cache_dir" \
--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1 \
//zoo:breeding-program >& $TEST_log \
|| echo "Expected fetch to succeed"

shutdown_server

bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
bazel fetch --repository_cache="$repo_cache_dir" \
--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1 \
//zoo:breeding-program >& $TEST_log \
|| echo "Expected fetch to succeed"

# Break url in WORKSPACE
Expand All @@ -489,24 +499,24 @@ http_archive(
)
EOF

# By default, cache entry will still match by sha256, even if url is changed.
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
# Without the default canonical id, cache entry will still match by sha256, even if url is
# changed.
bazel fetch --repository_cache="$repo_cache_dir" \
--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1 \
//zoo:breeding-program >& $TEST_log \
|| echo "Expected fetch to succeed"
}

function test_experimental_repository_cache_urls_as_default_canonical_id() {

function test_http_archive_urls_as_default_canonical_id() {
setup_repository

bazel fetch --repository_cache="$repo_cache_dir" \
--experimental_repository_cache_urls_as_default_canonical_id \
//zoo:breeding-program >& $TEST_log \
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
|| echo "Expected fetch to succeed"

shutdown_server

bazel fetch --repository_cache="$repo_cache_dir" \
--experimental_repository_cache_urls_as_default_canonical_id \
//zoo:breeding-program >& $TEST_log \
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
|| echo "Expected fetch to succeed"

# Break url in WORKSPACE
Expand All @@ -523,9 +533,7 @@ http_archive(
EOF

# As repository cache key should depend on urls, we expect fetching to fail now.
bazel fetch --repository_cache="$repo_cache_dir" \
--experimental_repository_cache_urls_as_default_canonical_id \
//zoo:breeding-program >& $TEST_log \
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
&& fail "expected failure" || :
}

Expand Down
3 changes: 3 additions & 0 deletions src/test/shell/testenv.sh.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ EOF
if [[ -n ${REPOSITORY_CACHE:-} ]]; then
echo "testenv.sh: Using repository cache at $REPOSITORY_CACHE."
echo "common --repository_cache=$REPOSITORY_CACHE" >> $TEST_TMPDIR/bazelrc
# TODO: Remove this flag once all dependencies are mirrored.
# See https://github.com/bazelbuild/bazel/pull/19549 for more context.
echo "common --repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1" >> $TEST_TMPDIR/bazelrc
if is_darwin; then
# For reducing SSD usage on our physical Mac machines.
echo "testenv.sh: Enabling --experimental_repository_cache_hardlinks"
Expand Down
48 changes: 48 additions & 0 deletions tools/build_defs/repo/cache.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Copyright 2023 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# WARNING:
# https://github.com/bazelbuild/bazel/issues/17713
# .bzl files in this package (tools/build_defs/repo) are evaluated
# in a Starlark environment without "@_builtins" injection, and must not refer
# to symbols associated with build/workspace .bzl files

visibility("private")

NO_DEFAULT_CANONICAL_ID_ENV = "BAZEL_NO_DEFAULT_CANONICAL_ID"

CANONICAL_ID_DOC = """A canonical ID of the file downloaded.
If specified and non-empty, Bazel will not take the file from cache, unless it
was added to the cache by a request with the same canonical ID.
If unspecified or empty, Bazel by default uses the URLs of the file as the
canonical ID. This helps catch the common mistake of updating the URLs without
also updating the hash, resulting in builds that succeed locally but fail on
machines without the file in the cache. This behavior can be disabled with
--repo_env={env}=1.
""".format(env = NO_DEFAULT_CANONICAL_ID_ENV)

def get_default_canonical_id(repository_ctx, urls):
"""Returns the default canonical id to use for downloads."""
if repository_ctx.os.environ.get(NO_DEFAULT_CANONICAL_ID_ENV) == "1":
return ""

# Do not sort URLs to prevent the following scenario:
# 1. http_archive with urls = [B, A] created.
# 2. Successful fetch from B results in canonical ID "A B".
# 3. Order of urls is flipped to [A, B].
# 4. Fetch would reuse cache entry for "A B", even though A may be broken (it has never been
# fetched before).
return " ".join(urls)
33 changes: 15 additions & 18 deletions tools/build_defs/repo/http.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ These rules are improved versions of the native http rules and will eventually
replace the native rules.
"""

load(
":cache.bzl",
"CANONICAL_ID_DOC",
"NO_DEFAULT_CANONICAL_ID_ENV",
"get_default_canonical_id",
)
load(
":utils.bzl",
"patch",
Expand Down Expand Up @@ -142,7 +148,7 @@ def _http_archive_impl(ctx):
ctx.attr.sha256,
ctx.attr.type,
ctx.attr.strip_prefix,
canonical_id = ctx.attr.canonical_id,
canonical_id = ctx.attr.canonical_id or get_default_canonical_id(ctx, all_urls),
auth = auth,
integrity = ctx.attr.integrity,
)
Expand Down Expand Up @@ -182,7 +188,7 @@ def _http_file_impl(ctx):
"file/" + downloaded_file_path,
ctx.attr.sha256,
ctx.attr.executable,
canonical_id = ctx.attr.canonical_id,
canonical_id = ctx.attr.canonical_id or get_default_canonical_id(ctx, all_urls),
auth = auth,
integrity = ctx.attr.integrity,
)
Expand Down Expand Up @@ -219,7 +225,7 @@ def _http_jar_impl(ctx):
all_urls,
"jar/" + downloaded_file_name,
ctx.attr.sha256,
canonical_id = ctx.attr.canonical_id,
canonical_id = ctx.attr.canonical_id or get_default_canonical_id(ctx, all_urls),
auth = auth,
integrity = ctx.attr.integrity,
)
Expand Down Expand Up @@ -254,11 +260,7 @@ easier but either this attribute or `sha256` should be set before shipping.""",
doc = _AUTH_PATTERN_DOC,
),
"canonical_id": attr.string(
doc = """A canonical id of the archive downloaded.
If specified and non-empty, bazel will not take the archive from cache,
unless it was added to the cache by a request with the same canonical id.
""",
doc = CANONICAL_ID_DOC,
),
"strip_prefix": attr.string(
doc = """A directory prefix to strip from the extracted files.
Expand Down Expand Up @@ -379,6 +381,7 @@ following: `"zip"`, `"jar"`, `"war"`, `"aar"`, `"tar"`, `"tar.gz"`, `"tgz"`,
http_archive = repository_rule(
implementation = _http_archive_impl,
attrs = _http_archive_attrs,
environ = [NO_DEFAULT_CANONICAL_ID_ENV],
doc =
"""Downloads a Bazel repository as a compressed archive file, decompresses it,
and makes its targets available for binding.
Expand Down Expand Up @@ -454,11 +457,7 @@ field will make your build non-hermetic. It is optional to make development
easier but either this attribute or `sha256` should be set before shipping.""",
),
"canonical_id": attr.string(
doc = """A canonical id of the archive downloaded.
If specified and non-empty, bazel will not take the archive from cache,
unless it was added to the cache by a request with the same canonical id.
""",
doc = CANONICAL_ID_DOC,
),
"url": attr.string(doc = _URL_DOC),
"urls": attr.string_list(doc = _URLS_DOC),
Expand All @@ -473,6 +472,7 @@ unless it was added to the cache by a request with the same canonical id.
http_file = repository_rule(
implementation = _http_file_impl,
attrs = _http_file_attrs,
environ = [NO_DEFAULT_CANONICAL_ID_ENV],
doc =
"""Downloads a file from a URL and makes it available to be used as a file
group.
Expand Down Expand Up @@ -514,11 +514,7 @@ field will make your build non-hermetic. It is optional to make development
easier but either this attribute or `sha256` should be set before shipping.""",
),
"canonical_id": attr.string(
doc = """A canonical id of the archive downloaded.
If specified and non-empty, bazel will not take the archive from cache,
unless it was added to the cache by a request with the same canonical id.
""",
doc = CANONICAL_ID_DOC,
),
"url": attr.string(doc = _URL_DOC + "\n\nThe URL must end in `.jar`."),
"urls": attr.string_list(doc = _URLS_DOC + "\n\nAll URLs must end in `.jar`."),
Expand All @@ -537,6 +533,7 @@ unless it was added to the cache by a request with the same canonical id.
http_jar = repository_rule(
implementation = _http_jar_impl,
attrs = _http_jar_attrs,
environ = [NO_DEFAULT_CANONICAL_ID_ENV],
doc =
"""Downloads a jar from a URL and makes it available as java_import
Expand Down
Loading

0 comments on commit 555588b

Please sign in to comment.