-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgenerate_download_list.py
114 lines (95 loc) · 3.99 KB
/
generate_download_list.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import requests
from requests import Request, Session
import urllib
import json
import bs4 as bs
import re
import time
import toolkit_sqlite
import config
QUERY_URL = 'https://y2mate.com/analyze/ajax'
VIDEO_LIST = 'youtube_video.list'
proxy = config.proxy
def read_video_list():
with open(VIDEO_LIST, 'r') as f:
videoList = f.read().split('\n')
return videoList
def get_best_download_link(htmlPage):
'''Get the best quality video from download table'''
soup = bs.BeautifulSoup(htmlPage, 'lxml')
tbl = soup.table
# print(tbl.findAll("tr"))
records = []
for tr in [i for i in tbl.findAll("tr") if len(i.findAll("td")) == 3]:
# print(tr)
# print('+'*80)
record = dict()
resolution_tag, size, dl_link = tr.findAll("td")
if dl_link.a.attrs['href'].startswith('http'):
record['resolution'] = int(
resolution_tag.text.strip().split('p')[0])
record['link'] = dl_link.a.attrs['href']
record['name'] = dl_link.a.attrs['download']
record['size'] = size
records.append(record)
selected_link = max(records, key=lambda x: x['resolution'])
return selected_link
def query_link_generate(youtube_link):
'''
youtube_link = 'https://www.youtube.com/watch?v=iAzShkKzpJo'
'''
data = 'url={}&ajax=1'.format(urllib.parse.quote_plus(youtube_link))
headers = {
"accept": "*/*",
"accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7",
"content-type": "application/x-www-form-urlencoded; charset=UTF-8",
"origin": "https://y2mate.com",
"referer": "https://y2mate.com/youtube",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36",
"x-requested-with": "XMLHttpRequest",
}
Sess = requests.session()
req = Request('POST', QUERY_URL, data=data, headers=headers)
prepped = Sess.prepare_request(req)
resp = Sess.send(prepped, proxies=proxy)
resp_text = resp.content.decode('utf-8')
# print(data)
# print(resp_text)
result = json.loads(resp_text)
video_dict = get_best_download_link(result["result"])
return video_dict
if __name__ == '__main__':
# test_link = 'https://www.youtube.com/watch?v=f4KOjWS_KZs'
# query_link_generate(test_link)
# videoDownloadLinkList = list(map(query_link_generate, read_video_list()))
print('Count of videos to be downloaded: {}'.format(len(read_video_list())))
count = 1
with toolkit_sqlite.SqliteDB(config.DB_FILE) as db:
truncate_sql = '''DELETE FROM download_status;'''
db.execute(truncate_sql)
for originalLink in read_video_list():
video_info = query_link_generate(originalLink)
print('{}/{} Got {}: {}p'.format(count, len(read_video_list()),
video_info['name'], video_info['resolution']))
insert_sql = '''INSERT INTO download_status (
id,
video_name,
resolution,
size,
download_link,
original_link,
status
)
VALUES (
'{id}',
'{video_name}',
'{resolution}',
'{size}',
'{download_link}',
'{original_link}',
'{status}'
);'''.format(id=count, video_name=video_info['name'], original_link=originalLink,
download_link=video_info['link'], resolution=video_info['resolution'], size=video_info['size'], status='PENDING')
db.execute(insert_sql)
time.sleep(1)
count += 1