Skip to content

Commit

Permalink
Deduplicate urls parsed to reduce crawl requests
Browse files Browse the repository at this point in the history
  • Loading branch information
niyas-sait committed Feb 9, 2022
1 parent 30fed68 commit 8e5647c
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions src/test/shell/bazel/verify_workspace.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,25 @@ function test_verify_urls() {
# Find url-shaped lines, skipping jekyll-tree (which isn't a valid URL), and
# skipping comments.
invalid_urls=()
urls=()
for file in "${WORKSPACE_FILES[@]}"; do
for url in $(grep -E '"https://|http://' "${file}" | \
sed -e '/jekyll-tree/d' -e '/^#/d' -r -e 's#^.*"(https?://[^"]+)".*$#\1#g' | \
sort -u); do
echo "Checking ${url} ..."
if ! curl --head --silent --show-error --fail --output /dev/null --retry 3 "${url}"; then
invalid_urls+=("${url}")
# add only unique url to the array
if [[ ${#urls[@]} == 0 ]] || [[ ! " ${urls[@]} " =~ " ${url} " ]]; then
urls+=("${url}")
fi
done
done

for url in "${urls[@]}"; do
echo "Checking ${url} ..."
if ! curl --head --silent --show-error --fail --output /dev/null --retry 3 "${url}"; then
invalid_urls+=("${url}")
fi
done

if [[ ${#invalid_urls[@]} > 0 ]]; then
fail "Invalid urls: ${invalid_urls[@]}"
fi
Expand Down

0 comments on commit 8e5647c

Please sign in to comment.