-
Notifications
You must be signed in to change notification settings - Fork 489
/
Copy pathkantuSpider.py
46 lines (31 loc) · 1011 Bytes
/
kantuSpider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import re
import os
import time
import requests as rq
def get_all_page(page):
url = 'http://52kantu.cn/?page={}'.format(page)
html = rq.get(url).text
return html
def get_img_url(html):
regex = re.compile('<mip-img src="(.*?)"')
img_url_list = re.findall(regex, html)
return img_url_list
def down_img(img_url, s):
if not os.path.exists('./img'):
os.mkdir('./img')
with open('./img/{}.jpg'.format(
img_url.split('.')[1].split('/')[-1]), 'wb'
) as fp:
fp.write(s.get(img_url, timeout=5).content)
def main(all_page, s):
for page in range(1, all_page):
try:
html = get_all_page(page)
img_url_list = get_img_url(html)
[down_img(img_url, s) for img_url in img_url_list]
time.sleep(5)
except:
pass
if __name__ == '__main__':
s = rq.Session()
main(320, s)