Skip to content

Commit

Permalink
Restore no content page visit failure special case
Browse files Browse the repository at this point in the history
As it happens even without the negative page load timeout
workaround sometimes.
  • Loading branch information
ghostwords committed Jan 7, 2025
1 parent 063d635 commit 529f4a1
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,7 +1037,14 @@ def crawl(self):
self.logger.info("Visiting %d: %s", i + 1, domain)
self.visit_domain(domain)

self.logger.info("Visited %s", self.get_current_url() or domain)
curl_or_domain = self.get_current_url() or domain
if curl_or_domain.startswith(CHROME_URL_PREFIX):
self.logger.error("Error loading %s: "
"driver.current_url is still %s",
domain, curl_or_domain)
continue

self.logger.info("Visited %s", curl_or_domain)
num_visited += 1

except (MaxRetryError, ProtocolError, ReadTimeoutError) as ex:
Expand Down

0 comments on commit 529f4a1

Please sign in to comment.