-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
117 lines (97 loc) · 3.52 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import logging
import multiprocessing as mp
from os.path import exists, expanduser, abspath
from handler_mangadex import HandlerMangaDex
from handler_mangahere import HandlerMangaHere
from constants import *
# The full path to the root library location
ROOT_LIB_PATH = expanduser("~/Desktop/manga_library/")
# A file containing all links to Manga to be downloaded
# Each line contains a link
# ...
# https://mangadex.org/title/XXXX
# https://mangadex.org/title/XXXX
# ...
#
# This can be generated via grab_urls.sh
LIBRARY_LINKS_FILE_PATH = "library_links.txt"
# The path to the log file so that problems may be debugged
# such as when a download fails or the last operation that
# occured before an exception was raised
LOG_FILE_PATH = abspath("log.txt")
# The path of the file containing missing chapters per title
MISSING_CHS_LOG_FILE_PATH = abspath("missing.txt")
# The number of concurrent HandlerMangaDex instances to exist
MAX_THREADS = 1
# Mappin between thread id to URLs
# Key: tid (int)
# Value: URLs (Set[str])
TID_TO_URLS = dict()
def create_thread_mapping():
"""
Map URLs in LIBRARY_LINKS_FILE_PATH to a thread id
modulo MAX_THREADS
"""
logging.info("[create_thread_mapping]: Parsing " + LIBRARY_LINKS_FILE_PATH)
assert(exists(LIBRARY_LINKS_FILE_PATH))
with open(LIBRARY_LINKS_FILE_PATH, "r") as fd:
lines = fd.readlines()
counter = 0
for l in lines:
counter = counter % MAX_THREADS
if counter not in TID_TO_URLS.keys():
TID_TO_URLS[counter] = set()
TID_TO_URLS[counter].add(l)
counter += 1
fd.close()
logging.info("[create_thread_mapping]: Done parsing " + LIBRARY_LINKS_FILE_PATH)
def run_handler_thread(tid):
# Obtain the URLs that are for this thread
logging.info("[run_handler_thread]: Starting tid " + str(tid))
urls = sorted(TID_TO_URLS[tid])
for url in urls:
url = url.strip()
# logging.info("[run_handler_thread]: Thread " + str(tid) + " handling: " + str(url))
if "mangadex" in url:
mh = HandlerMangaDex(tid, SOURCE_MANGADEX, MISSING_CHS_LOG_FILE_PATH)
mh.reset_for_next_title()
mh.init_for_title(ROOT_LIB_PATH, url)
mh.extract_metadata()
mh.get_update()
elif "mangahere" in url:
mh = HandlerMangaHere(tid, SOURCE_MANGAHERE)
mh.reset_for_next_title()
mh.init_for_title(ROOT_LIB_PATH, url)
mh.extract_metadata()
mh.get_update()
else:
raise Exception("Unknown source!")
if __name__ == "__main__":
# Setup Logging
logging.basicConfig(level=logging.INFO
, format="%(asctime)s;%(levelname)s;%(message)s"
, datefmt="%Y-%m-%d %H:%M:%S"
, filename=LOG_FILE_PATH
, filemode="w")
# define a Handler which writes INFO messages or higher to the sys.stderr
console = logging.StreamHandler()
console.setLevel(logging.INFO)
# set a format which is simpler for console use
formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
# tell the handler to use this format
console.setFormatter(formatter)
# add the handler to the root logger
logging.getLogger().addHandler(console)
logging.info("[main]: Starting!")
# open missing to clear it
with open(MISSING_CHS_LOG_FILE_PATH, "w") as fd:
fd.close()
create_thread_mapping()
if MAX_THREADS == 1:
# Single process version
run_handler_thread(0)
else:
# Multi process version
p = mp.Pool(MAX_THREADS)
p.map(run_handler_thread, [i for i in range(MAX_THREADS)])
logging.info("[main]: Completed!")