From 44a8e2b06aca201f432973c8278620d6aed13372 Mon Sep 17 00:00:00 2001 From: Niyas Sait Date: Wed, 9 Feb 2022 07:23:57 -0800 Subject: [PATCH] Deduplicate urls parsed to reduce crawl requests https://github.com/bazelbuild/bazel/pull/14700 added couple more URLs to fetch JDK package and seems to be causing some infrastructure as discussed in https://github.com/bazelbuild/bazel/pull/14700. This patch workaround the issue by removing the duplicated URLs and reduce the crawl request. Closes #14763. PiperOrigin-RevId: 427464876 --- src/test/shell/bazel/verify_workspace.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/test/shell/bazel/verify_workspace.sh b/src/test/shell/bazel/verify_workspace.sh index ab1e3feeee3c3b..14f0e0390cdbf0 100755 --- a/src/test/shell/bazel/verify_workspace.sh +++ b/src/test/shell/bazel/verify_workspace.sh @@ -46,14 +46,18 @@ function test_verify_urls() { # Find url-shaped lines, skipping jekyll-tree (which isn't a valid URL), and # skipping comments. invalid_urls=() + checked_urls=() for file in "${WORKSPACE_FILES[@]}"; do for url in $(grep -E '"https://|http://' "${file}" | \ sed -e '/jekyll-tree/d' -e '/^#/d' -r -e 's#^.*"(https?://[^"]+)".*$#\1#g' | \ sort -u); do - #echo "Checking ${url}" - if ! curl --head -silent --fail --output /dev/null --retry 3 "${url}"; then - #fail "URL ${url} is invalid." - invalid_urls+=("${url}") + # add only unique url to the array + if [[ ${#checked_urls[@]} == 0 ]] || [[ ! " ${checked_urls[@]} " =~ " ${url} " ]]; then + checked_urls+=("${url}") + # echo "Checking ${url} ..." + if ! curl --head --silent --show-error --fail --output /dev/null --retry 3 "${url}"; then + invalid_urls+=("${url}") + fi fi done done