-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrap.py
76 lines (65 loc) · 2.48 KB
/
scrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import requests
from bs4 import BeautifulSoup
# Scrape the data from the website
url = 'https://www.bergdorfgoodman.com/c/womens-clothing-clothing-dresses-cat80001?navpath=cat000000_cat000002_cat441206_cat80001'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# Extract products
products = soup.find_all('div', class_='column custom-4-desktop-tablet custom-6-mobile product basic-grid cms-grid-item')
# Base URL if needed
base_url = 'https://www.bergdorfgoodman.com'
# Start creating the HTML file with Bootstrap
html_content = '''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Dresses List</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha3/dist/css/bootstrap.min.css" rel="stylesheet">
</head>
<body>
<div class="container">
<h1 class="my-4">Dresses List</h1>
<div class="row">
'''
# Loop through each product and build the HTML content
for product in products:
designer_name = product.find('h2', class_='designer-name').text.strip()
dress_name = product.find('h2', class_='product-name').text.strip()
price = product.find('div', class_='product-thumbnail__sale-price').text.strip()
# Handle image URL, checking for `src` or `srcset`
image_tag = product.find('img')
if image_tag:
image_url = image_tag.get('src')
# Fix URLs starting with '//' by prepending 'https:'
if image_url.startswith('//'):
image_url = 'https:' + image_url
elif image_url.startswith('/'):
image_url = base_url + image_url
else:
image_url = 'No image available'
# Create a Bootstrap card for each product
html_content += f'''
<div class="col-md-4">
<div class="card mb-4 shadow-sm">
<img src="{image_url}" class="card-img-top" alt="{dress_name}">
<div class="card-body">
<h5 class="card-title">{designer_name}</h5>
<p class="card-text">{dress_name}</p>
<p class="card-text text-muted">{price}</p>
</div>
</div>
</div>
'''
# Close the HTML tags
html_content += '''
</div>
</div>
</body>
</html>
'''
# Save the HTML content to a file
with open("dresses.html", "w", encoding='utf-8') as file:
file.write(html_content)
print("HTML file created successfully!")