-
Notifications
You must be signed in to change notification settings - Fork 0
/
reviews.py
45 lines (36 loc) · 1.5 KB
/
reviews.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import scrapy
class ReviewItem(scrapy.Item):
title = scrapy.Field()
rating = scrapy.Field()
date = scrapy.Field()
location = scrapy.Field()
content = scrapy.Field()
class ShopifyReviewSpider(scrapy.Spider):
name = 'reviews'
custom_settings = {
'DOWNLOD_DELAY': 1,
'FEED_FORMAT': 'csv',
'FEED_URI' : 'reviews.csv'
}
headers = {}
params = {}
start_urls = [
'https://apps.shopify.com/yotpo-subscription/reviews',
]
def parse(self, response):
list = []
for quote in response.css('div.review-listing '):
item = ReviewItem()
item['title'] = quote.css('.review-listing-header__text::text').get().strip()
item['rating'] = quote.css('.ui-star-rating')[0].xpath('@data-rating').get()
item['date'] = quote.css('div.review-metadata__item-label::text').get().strip()
item['content'] = quote.css('.review-content .truncate-content-copy p::text').get().strip()
item['location'] = quote.css('div.review-merchant-characteristic__item span::text').get()
if item['location'] is not None:
item['location'] = item['location'].strip()
# yield item
list.append(item)
next_page = response.css('div.search-pagination a.search-pagination__next-page-text::attr("href")').get()
if next_page is not None:
# yield response.follow(next_page, self.parse)
yield response.follow(next_page, self.parse)