-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprincipal.py
43 lines (32 loc) · 1.44 KB
/
principal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import requests
driver = webdriver.Chrome()
driver.get("https://www.cnnbrasil.com.br/")
time.sleep(5)
def cap_links(max_links=100):
captura_links = set()
links = driver.find_elements(By.CSS_SELECTOR, "a")
with open("links.txt", "w") as file:
for link_element in links:
href = link_element.get_attribute("href")
if href and href.startswith("http") and href not in captura_links:
captura_links.add(href)
try:
response = requests.get(href, timeout=5)
status = response.status_code
if status == 200:
file.write(f"Acessível: {href}\n")
print(f"Link acessível: {href}")
else:
file.write(f"Inacessível (Status {status}): {href}\n")
print(f"Link inacessível (Status {status}): {href}")
except requests.exceptions.RequestException as e:
file.write(f"Inacessível (Erro): {href}\n")
print(f"Link inacessível (Erro): {href} - {str(e)}")
if len(captura_links) >= max_links:
break
print(f"Links encontrados e salvos: {len(captura_links)}")
driver.quit()
cap_links(max_links=100)