Restore no content page visit failure special case

As it happens even without the negative page load timeout workaround sometimes.
EFForg · Jan 7, 2025 · 529f4a1 · 529f4a1
1 parent 063d635
commit 529f4a1
Showing 1 changed file with 8 additions and 1 deletion.
diff --git a/crawler.py b/crawler.py
@@ -1037,7 +1037,14 @@ def crawl(self):
                 self.logger.info("Visiting %d: %s", i + 1, domain)
                 self.visit_domain(domain)
 
-                self.logger.info("Visited %s", self.get_current_url() or domain)
+                curl_or_domain = self.get_current_url() or domain
+                if curl_or_domain.startswith(CHROME_URL_PREFIX):
+                    self.logger.error("Error loading %s: "
+                        "driver.current_url is still %s",
+                        domain, curl_or_domain)
+                    continue
+
+                self.logger.info("Visited %s", curl_or_domain)
                 num_visited += 1
 
             except (MaxRetryError, ProtocolError, ReadTimeoutError) as ex: