Skip to content

Commit

Permalink
使用h5st逆向爬取,发布2.0版本
Browse files Browse the repository at this point in the history
  • Loading branch information
Viper373 committed May 3, 2024
0 parents commit 3b3a66f
Show file tree
Hide file tree
Showing 102 changed files with 30,932 additions and 0 deletions.
Binary file added algo/algo.algo
Binary file not shown.
88 changes: 88 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# 商品信息
PRODUCTS = {
"外设产品":
[
{
"name": "VGN V87有线/无线/蓝牙三模客制化机械键盘gasket结构全键热插拔游戏电竞办公键盘IP联名款 V87PRO 阿尼亚轴 雅典娜",
"productid": "10088121691076",
"type": "键盘"
},
{
"name": "迈从(MCHOSE)X75客制化机械键盘无线三模gasket结构全键热插拔蓝牙电竞游戏办公 冰蓝苍穹-风信子轴",
"productid": "10084362354326",
"type": "键盘"
},
{
"name": "狼蛛(AULA)F87客制化机械键盘全键热插拔gasket结构三模无线蓝牙电竞游戏办公键盘 F87 Pro时空秘境 雾透侧刻 太空金轴",
"productid": "10090891841395",
"type": "键盘"
},
{
"name": "VGN V98PRO V2 三模有线/蓝牙/无线 客制化键盘 机械键盘 电竞游戏 办公家用 全键热插拔 gasket结构 V98Pro-V2 极地狐轴 限定款",
"productid": "10091508046043",
"type": "键盘"
},
{
"name": "罗技(G) G903 LIGHTSPEED 无线电竞充电游戏鼠标 hero芯片逻辑赛博朋克宏吃鸡鼠标 G903HERO无线游戏鼠标",
"productid": "53121348244",
"type": "鼠标"
},
],
"电脑配件":
[
{
"name": "AOC 27英寸 2K IPS快速液晶 240Hz 10Bit全开大乌兹 HDR400 低蓝光不闪 游戏电竞电脑显示器 Q27G3Z",
"productid": "100051562321",
"type": "显示器"
},
{
"name": "AOC 27英寸 2K HDR400 IPS 原生180Hz 1ms 10Bit 满血小金刚MAX 旋转升降 游戏电竞显示器 Q27G2S/D",
"productid": "100021043464",
"type": "显示器"
}
],
}

# 数据存储路径
DATA_PATH = "data"

# 代理
# 若不使用代理,将51-54行注释掉即可
# PROXIES = {
# "http": "http://127.0.0.1:7890",
# "https": "http://127.0.0.1:7890",
# }
# cookie
cookie = ("__jdu=1708653980687272576454; "
"shshshfpa=e06713c9-50ca-abad-b1cd-3ad465c28803-1708691725; "
"shshshfpx=e06713c9-50ca-abad-b1cd-3ad465c28803-1708691725; "
"pinId=Dusx94KFDmYhD-3Yboc6LQ; "
"pin=jd_vQgsEnIZspMn; "
"unick=%E4%BD%A0%E4%BB%AC%E6%89%93%E5%9B%A2%E6%88%91%E5%8D%96%E8%90%8C; "
"_tp=Zmq7%2FP8P7IGN6KhKyt6cZQ%3D%3D; "
"_pst=jd_vQgsEnIZspMn; "
"b_webp=1; "
"b_avif=1; "
"autoOpenApp_downCloseDate_auto=1712933137877_1800000; "
"b_dh=951; "
"b_dpr=1; "
"b_dw=1850; "
"__jdv=76161171|direct|-|none|-|1714420442639; "
"PCSYCityID=CN_620000_621000_0; "
"TrackID=1G-MmCrg2rDLoozbyXNVPMHSTQaN4nAWOrk4GtuEECd1_PXWL1D6OCP8a6bejS0FD3yZ-rJegmcbXmkcXpd4AMwWgoR5ZhwB9oyQnfKrkS_Mpm9BawUZytrOAA7QPQD8J; "
"thor=82E7DAD074A2F55D0EF704FCF3C4DD87486776F9CCBB1560F7C638540DBADD9342225CC9BFD1053BC83604AEEF4A57B8F1405E13C8B64015794C58FD4B96C4D9DD3C86598DFF1902240F0921C05BD6F4B311AA5150951F998A590944280F869132C14B1A82DCAB53C2AABAE44D14E61C8FE5D7E8F9196EE279D6874A8944421966CA5216652FD45426681117E7AC0CCDF4BE13E26232C6AAC8EC7FF107436168; "
"flash=2_J9DOQAkN6bg0ghyazpOXqiMmc-ibFHz2oEY2V1H7kXG7WfDf-DnxwPk3wH31EevBbHhevrEjEXrITrTbFcbPqPONgb3Y4MHEOnbMufUOOf4VcTjLu-5VGv5fkSP7KBfLZesbzaQzs_HKCITpbp6HU_DaLrKRaevW9glAiOez1OP*; "
"areaId=28; "
"ipLoc-djd=28-2525-2529-17638; "
"jcap_dvzw_fp=h9J_m8zs5G9vz9DOXSJY-5EUGPhMPX1648QCPYbLCpMt5k5nBxwwtbh3bZmWOUik-AGzVnk9w1UfnQUvZgkViQ==; "
"3AB9D23F7A4B3C9B=J27IVEQH32WSMEDRLBC23DAU3AF25GTMWOGGUPXM66CWQF5VMZKCVBD4U2GKSVGOCI7GDTOBVKGIEODPUKBHKMNDYI; "
"shshshfpb=BApXcxeLBOOpAkb3Zoo1FGQMEU6PeBDnZBkooEAp29xJ1MrKqxYO2; "
"3AB9D23F7A4B3CSS=jdd03J27IVEQH32WSMEDRLBC23DAU3AF25GTMWOGGUPXM66CWQF5VMZKCVBD4U2GKSVGOCI7GDTOBVKGIEODPUKBHKMNDYIAAAAMPH2JL6TQAAAAAD7HTX5QXMZMW7YX; "
"_gia_d=1; "
"token=2305042806cb3f890aa4f303f65deed1,3,952634; "
"mt_xid=V2_52007VwMUU1RfVlgXQBhbDGEAFFFZXlVfG0wpWgRiBxBaDlBOUxYaGkAAMFYWTg5ZBVkDHBFbATMFQFUICAIJL0oYXwB7AhZOXFpDWR9CGFQOZwsiUG1YYl8bTxlcAGAGFldtXVNTGQ%3D%3D; "
"__tk=nDnCnZnMiUiKRUGLRxG5iZaKWlaCiZiLRUa5ixbOVZy,3,952634; "
"jsavif=1; "
"__jda=181111935.1708653980687272576454.1708653980.1714693695.1714741755.10; "
"__jdb=181111935.2.1708653980687272576454|10.1714741755; "
"__jdc=181111935")
253 changes: 253 additions & 0 deletions h5st.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
import hashlib
import json
import os
import pickle
import random
import subprocess
import time
import copy

import execjs
import requests
from Crypto.Cipher import AES

from utils.codec_utils import HEX
from utils.digester_utils import Digester
from datetime import datetime
from datetime import timezone, timedelta

from utils.symmetric_crypto_utils import SymmetricCrypto


class MySubprocessPopen(subprocess.Popen):
def __init__(self, *args, **kwargs):
super().__init__(encoding='UTF-8', *args, **kwargs)


subprocess.Popen = MySubprocessPopen


def generate_random_code(length):
char_set = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-'
"""
生成一个指定长度的随机字符串。
:param char_set: 用于生成随机码的字符集。
:param length: 随机字符串的长度。
:return: 生成的随机字符串。
"""
return ''.join(random.choice(char_set) for _ in range(length))


def gen_sign(key, t):
data = '&'.join([f'{item["key"]}:{item["value"]}' for item in t])
result = Digester.md5(f'{key}{data}{key}')
return result


class H5ST(object):
def __init__(self, skuId):
self.sku_id = skuId
self.appid = 'fb5df'
self.version = '4.4'
self.algo = None
self.fingerPrint = None
self.token = None
self.algo_dir_path = './algo/'

def encrypt_env(self):
aes_iv = '0102030405060708'
aes_key = 'r1T.6Vinpb.k+/a)'
env_data = {
"sua": "Windows NT 10.0; Win64; x64",
"pp": {
"p2": "jd_4536d74677e8d"
},
"extend": {
"wd": 0,
"l": 0,
"ls": 5,
"wk": 0,
"bu1": "0.1.9",
"bu2": -1,
"bu3": 91,
"bu4": 0,
"bu5": 0
},
"random": generate_random_code(11),
"v": "h5_file_v4.4.0",
"fp": self.fingerPrint,
"bu1": "0.1.8"
}

key = aes_key.encode('utf8')
iv = aes_iv.encode('utf8')
data = json.dumps(env_data, indent=2)
plaintext = data.encode('utf8')
ciphertext = SymmetricCrypto.encrypt_aes(key, plaintext, AES.MODE_CBC, HEX, iv)
return ciphertext

def gen_key(self, u):
nodejs = '''
const CryptoJS = require('crypto-js');
const cryptojs= {
HmacMD5: function (text, secret){return CryptoJS.HmacMD5(text,secret).toString()},
HmacSHA256: function (text, secret){return CryptoJS.HmacSHA256(text, secret).toString()},
HmacSHA512: function (text, secret){return CryptoJS.HmacSHA512(text, secret).toString()},
MD5: function (text){return CryptoJS.MD5(text).toString()},
SHA256: function (text){return CryptoJS.SHA256(text).toString()},
SHA512: function (text){return CryptoJS.SHA512(text).toString()},
}
function genKey(tk,fp,ts,ai){
return fn(tk,fp,ts,ai,cryptojs)
}
'''
nodejs = f'{nodejs}\n const fn = {self.algo}'
return execjs.compile(nodejs).call("genKey", self.token, self.fingerPrint, u, self.appid)

def gen_h5st(self, l):
if self.load_algo_to_local() is None:
self.get_algo()
# 先获取对象的所有键,并按字母顺序排序
t = [{'key': k, 'value': l[k]} for k in sorted(l.keys())]
# 给定的时间戳(毫秒)
timestamp_ms = int(time.time() * 1000)
# 将时间戳转换为datetime对象(假设时间戳是基于UTC的)
dt = datetime.utcfromtimestamp(timestamp_ms / 1000.0)
beijing_time = dt.replace(tzinfo=timezone.utc).astimezone(timezone(timedelta(hours=8)))
# 格式化时间
date_time = beijing_time.strftime("%Y%m%d%H%M%S%f")[:17] # 包括毫秒的前17个字符
# print(f"date_time={date_time}")
u = date_time + '88'
# print(f"u={u}")
env_data = self.encrypt_env()
# print(f'encrypt_env={env_data}')
key = self.gen_key(u)
# print(f'key={key}')
sign = gen_sign(key, t)
# print(f'sign={sign}')
h5st = f'{date_time};{self.fingerPrint};{self.appid};{self.token};{sign};{self.version};{timestamp_ms};{env_data}'
# print(f'h5st={h5st}')
return h5st

def save_algo_to_local(self, algo):
algo_file = '{}{}.algo'.format(self.algo_dir_path, 'algo')
directory = os.path.dirname(algo_file)
if not os.path.exists(directory):
os.makedirs(directory)
with open(algo_file, 'wb') as f:
pickle.dump(algo, f)

def load_algo_to_local(self):
algo_file = ''
if not os.path.exists(self.algo_dir_path):
return None
for name in os.listdir(self.algo_dir_path):
if name.endswith(f".algo"):
algo_file = '{}{}'.format(self.algo_dir_path, name)
break
if algo_file == '':
return None
with open(algo_file, 'rb') as f:
algo_file = pickle.load(f)
self.algo = algo_file['algo']
self.token = algo_file['token']
self.fingerPrint = algo_file['fingerPrint']
return algo_file

def get_algo(self):
headers = {
"authority": "cactus.JD_comments_2.0.com",
"accept": "application/json",
"accept-language": "zh-CN,zh;q=0.9",
"content-type": "application/json",
"origin": "https://item.jd.com",
"referer": f"https://item.jd.com/{self.sku_id}.html",
"sec-ch-ua": "\"Chromium\";v=\"122\", \"Not(A:Brand\";v=\"24\", \"Google Chrome\";v=\"122\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
}
url = "https://cactus.jd.com/request_algo"
params = {
"g_ty": "ajax"
}
with open('fingerPrint.js', encoding='utf-8') as f:
js = f.read()
fp = execjs.compile(js).call("genFingerPrint")

key = 'wm0!@w-s#ll1flo('
iv = '0102030405060708'
env = {
"wc": 0,
"wd": 0,
"l": "zh-CN",
"ls": "zh-CN,zh",
"ml": 2,
"pl": 5,
"av": "5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"sua": "Windows NT 10.0; Win64; x64",
"pp": {
"p2": "jd_4536d74677e8d"
},
"extend": {
"wd": 0,
"l": 0,
"ls": 5,
"wk": 0,
"bu1": "0.1.9",
"bu2": 0,
"bu3": 92,
"bu4": 0,
"bu5": 0
},
"pp1": "",
"bu1": "",
"w": 1920,
"h": 1080,
"ow": 160,
"oh": 28,
"url": "https://item.jd.com/100071422470.html",
"og": "https://item.jd.com",
"pr": 1,
"re": "https://cfe.m.jd.com/",
"random": generate_random_code(11),
"referer": "https://cfe.m.jd.com/",
"v": "h5_file_v4.4.0",
"ai": "fb5df",
"fp": fp
}
key = key.encode('utf8')
iv = iv.encode('utf8')
data = json.dumps(env, indent=2)
plaintext = data.encode('utf8')
ciphertext = SymmetricCrypto.encrypt_aes(key, plaintext, AES.MODE_CBC, HEX, iv)
data = {
"version": "4.4",
"fp": fp,
"appId": "fb5df",
"timestamp": int(time.time() * 1000),
"platform": "web",
"expandParams": ciphertext,
"fv": "h5_file_v4.4.0"
}
data = json.dumps(data, separators=(',', ':'))
response = requests.post(url, headers=headers, params=params, data=data)
resp_dict = json.loads(response.text)
result = resp_dict['data']['result']
token = result['tk']
algo = result['algo']
fingerPrint = result['fp']
print(f'token={token}\nalgo={algo}\nfingerPrint={fingerPrint}')
algo_file = {'token': token, 'algo': algo, 'fingerPrint': fingerPrint}
self.algo = algo
self.token = token
self.fingerPrint = fingerPrint
self.save_algo_to_local(algo_file)
return fingerPrint, token, algo


Loading

0 comments on commit 3b3a66f

Please sign in to comment.