-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtakenature2.py
31 lines (25 loc) · 1.28 KB
/
takenature2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import scrapy
# search involving life science in germany
class takenature2(scrapy.Spider):
name="naturetaker"
start_urls=['https://www.nature.com/naturecareers/jobs/search?text=Life+Science&location=Germany']
def parse(self,response):
for article in response.css('article'):
try:
yield{
'Title' :article.css('h3.p-card__info-title ::text').get(),
'Subtitle' :article.css('p.p-card__info-subtitle ::text').get(),
'Location':article.css('p.p-card__info-location ::text').get(),
'Time' : article.css('p.p-card__info-time::text').get()
}
except:
yield{
'Title' :article.css('h3.p-card__info-title ::text').get(),
'Subtitle' :article.css('p.p-card__info-subtitle ::text').get(),
'Location':article.css('p.p-card__info-location ::text').get(),
'Time': 'Time not mentioned'
}
# parse to next page
next_page=response.css('a.c-pagination__item.c-pagination__item--arrow-right').attrib['href']
if next_page is not None:
yield response.follow(next_page,callback=self.parse)