-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchars_biography_scraper.py
48 lines (35 loc) · 1.45 KB
/
chars_biography_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
"""
@thour : Meysam Raz
"""
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
import pandas as pd
import time
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(executable_path="CHROME_PATH",options=options)
driver.maximize_window()
urls=[]
details_list = []
driver.get('https://dark-netflix.fandom.com/wiki/Dark_Wiki')
time.sleep(2)
print('Start scraping')
while True:
for a in driver.find_elements_by_xpath('/html/body/div[4]/div[3]/div[2]/main/div[3]/div/div/div[1]/div/div[5]/div/div/div[2]/div/a'):
url = a.get_attribute('href')
url_dict = {"url":url}
urls.append(url_dict)
df_urls = pd.DataFrame(urls)
print("Available Restaurants: ",len(df_urls))
print('Start getting Bios')
for url in df_urls['url'].to_list():
driver.get(url)
title = driver.find_element_by_xpath('/html/body/div[4]/div[3]/div[2]/main/div[3]/div[2]/div/aside/h2').text
biography = [bio.text for bio in driver.find_elements_by_xpath('/html/body/div[4]/div[3]/div[2]/main/div[3]/div[2]/div/p')]
new_title = title.replace("/", " ")
filetitle =new_title+".text"
with open(filetitle, "w", encoding='utf-8') as file:
for bio in biography:
file.write(bio)
file.close()
time.sleep(2)