-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspider.py
53 lines (47 loc) · 1.65 KB
/
spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import re
import hashlib
import requests
import config
username = config.JW_USERNAME
password = config.JW_PASSWORD
home_url = 'http://219.223.252.46:9001/login.do'
login_url = 'http://219.223.252.46:9001/j_acegi_login.do'
socre_url = 'http://219.223.252.46:9001/cjgl.v_allcj_yjs.do'
cookies = {'JSESSIONID': None}
def login():
print('正在登录...')
md5_obj = hashlib.md5(password.encode())
md5_pwd = md5_obj.hexdigest() # 登录密码需md5加密
home_page = requests.get(home_url) # 访问登录页面,获取cookie
cookies['JSESSIONID'] = home_page.cookies['JSESSIONID']
payload = {
'j_captcha_response': '',
'j_username': username,
'j_password': md5_pwd,
'x': 22,
'y': 10
}
login_resp = requests.get(login_url, params=payload, cookies=cookies)
if 'URP研究生教务系统' in login_resp.text:
return True
else:
print('登录失败')
return False
def get_score():
result = []
r = requests.get(socre_url, cookies=cookies)
if 'gridData' not in r.text: # 未登陆
s = login() # 重新登陆,验证cookie
if s:
# 重新获取成绩页面
r = requests.get(socre_url, cookies=cookies)
else:
return result
text = re.sub(r'\s', '', r.text) # 去除所有空白
score_list_str = re.search(r'vargridData=(.*);</script><tablewidth', text) # 提取成绩列表-list
if score_list_str:
score_list_str = score_list_str.group(1)
result = eval(score_list_str) # 把字符串中的list转为python的list对象
else:
print('提取成绩列表失败')
return result