-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWPA_Music_1_JSON_Write.py
62 lines (46 loc) · 1.16 KB
/
WPA_Music_1_JSON_Write.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from bs4 import BeautifulSoup
import requests
import json
url = "http://nutrias.org/~nopl/photos/wpa/wpa30.htm"
all_my_data = []
home_page = requests.get(url)
home_page_html = home_page.text
soup = BeautifulSoup(home_page_html, 'html.parser')
column_map = {
1: "series_number",
2: "project",
3: "series_title",
4: "description",
5: "date_taken",
6: "project_dates",
7: "OP",
8: "negatives",
9: "contact_prints",
10: "8x10_prints",
11: "digital_photos",
}
records = soup.find_all('tr')
for record in records:
my_data = {
}
fields = record.find_all("td")
counter = 0
for entry in fields:
counter = counter + 1
label = column_map[counter]
try:
data_rows = entry.find("font")
data_rows = data_rows.text
my_data[label] = data_rows
except AttributeError:
continue
image_urls = []
item_link = record.find_all('a')
for link in item_link:
abs_url = "http://nutrias.org/~nopl/photos/wpa/" + link['href']
image_urls.append(abs_url)
my_data[label] = image_urls
all_my_data.append(my_data)
with open('WPA_Music_Collection.json', 'w') as f_object:
json.dump(all_my_data, f_object, indent=2)
print("Your file is now ready")