-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathin_bestseller.py
83 lines (70 loc) · 2.63 KB
/
in_bestseller.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import requests
import bs4
import csv
# Getting main page from amazon.com bestsellers website
get_url = 'https://www.amazon.in/gp/bestsellers/books'
main_page = requests.get(get_url)
main_page_soup = bs4.BeautifulSoup(main_page.text, 'lxml')
pages_list = main_page_soup.select('.zg_page')
pages_links_list = []
# Getting all pages
for i in pages_list:
pages_links_list.append(i.find('a').get('href'))
authors = []
links = []
prices = []
names = []
ratings = []
num_ratings = []
# Getting Selectors for the required fields
for link in pages_links_list:
res = requests.get(link)
soup = bs4.BeautifulSoup(res.text, 'lxml')
main = soup.select('.zg_itemWrapper')
for i in main:
try:
authors.append(i.select('.a-link-child')[0].text)
except:
try:
authors.append(i.select('.a-color-base')[0].text)
except:
authors.append('Not available')
try:
links.append(i.find('a').get('href'))
except:
links.append('Not available')
try:
names.append(i.select('.p13n-sc-truncate')[0].getText().strip())
except:
names.aapend('Not available')
try:
prices.append(i.select('.p13n-sc-price')[0].text.strip())
except:
prices.append('Not available')
try:
temp = i.select('.a-icon-row')[0]
# ratings.append(i.select('.a-icon-row')[0].select('.a-link-normal')[0].get('title'))
# num_ratings.append(i.select('.a-icon-row')[0].select('.a-size-small')[0].text)
ratings.append(temp.select('.a-link-normal')[0].get('title'))
num_ratings.append(temp.select('.a-size-small')[0].text)
except:
ratings.append('Not available')
num_ratings.append('Not available')
# Finalising the Lists with small corrections
for i in range(len(num_ratings)):
num_ratings[i] = num_ratings[i].replace(u',', u'')
if num_ratings[i] != 'Not available':
num_ratings[i] = int(num_ratings[i])
if prices[i] != 'Not available':
prices[i] = '₹' + prices[i].replace(u',', u'')
links[i] = 'https://www.amazon.in' + links[i]
# Declaring header list
title_list = ['Name', 'URL', 'Author', 'Price',
'Number of Ratings', 'Average Rating']
# Writting to csv file
with open('./output/in_book.csv', 'w', encoding='utf-8') as inpfil:
csv_writer = csv.writer(inpfil)
csv_writer.writerow(title_list)
for i in range(len(names)):
csv_writer.writerow([names[i]] + [links[i]] + [authors[i]] +
[prices[i]] + [num_ratings[i]] + [ratings[i]])