Skip to content
This repository has been archived by the owner on Jan 2, 2021. It is now read-only.

Commit

Permalink
Merge branch 'development'
Browse files Browse the repository at this point in the history
  • Loading branch information
evilhero committed Mar 29, 2019
2 parents c79cbe5 + a6a14cb commit 3f30fae
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 73 deletions.
9 changes: 6 additions & 3 deletions mylar/PostProcessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1420,11 +1420,14 @@ def Process(self):
if self.oneoffinlist is False:
self.oneoff = False
if any([self.issueid is not None, self.issuearcid is not None]):
if self.issueid is not None:
s_id = self.issueid
else:
if self.issuearcid is not None:
s_id = self.issuearcid
else:
s_id = self.issueid
nzbiss = myDB.selectone('SELECT * FROM nzblog WHERE IssueID=?', [s_id]).fetchone()
if nzbiss is None and self.issuearcid is not None:
nzbiss = myDB.selectone('SELECT * FROM nzblog WHERE IssueID=?', ['S'+s_id]).fetchone()

else:
nzbname = self.nzb_name
#remove extensions from nzb_name if they somehow got through (Experimental most likely)
Expand Down
6 changes: 3 additions & 3 deletions mylar/auth32p.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,11 +418,11 @@ def downloadfile(self, payload, filepath):

if str(r.status_code) != '200':
logger.warn('Unable to download torrent from 32P [Status Code returned: %s]' % r.status_code)
if str(r.status_code) == '404' and site == '32P':
if str(r.status_code) == '404':
logger.warn('[32P-CACHED_ENTRY] Entry found in 32P cache - incorrect. Torrent has probably been merged into a pack, or another series id. Removing from cache.')
helpers.delete_cache_entry(linkit)
self.delete_cache_entry(payload['id'])
else:
logger.info('content: %s' % r.content)
logger.fdebug('content: %s' % r.content)
return False


Expand Down
50 changes: 34 additions & 16 deletions mylar/filechecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,19 +408,19 @@ def parseit(self, path, filename, subpath=None):
lastmod_position = 0
booktype = 'issue'
#exceptions that are considered alpha-numeric issue numbers
exceptions = ('NOW', 'AI', 'AU', 'X', 'A', 'B', 'C', 'INH', 'MU', 'HU', 'SUMMER', 'SPRING', 'FALL', 'WINTER')
exceptions = ('NOW', 'AI', 'AU', 'X', 'A', 'B', 'C', 'INH', 'MU', 'HU', 'SUMMER', 'SPRING', 'FALL', 'WINTER', 'PREVIEW')

#unicode characters, followed by int value
# num_exceptions = [{iss:u'\xbd',val:.5},{iss:u'\xbc',val:.25}, {iss:u'\xe',val:.75}, {iss:u'\221e',val:'infinity'}]

file_length = 0
validcountchk = False
sep_volume = False
current_pos = -1
current_pos = -1
for sf in split_file:
current_pos +=1
#the series title will always be first and be AT LEAST one word.
if split_file.index(sf) >= 1 and not volumeprior:
if split_file.index(sf) >= 0 and not volumeprior:
dtcheck = re.sub('[\(\)\,]', '', sf).strip()
#if there's more than one date, assume the right-most date is the actual issue date.
if any(['19' in dtcheck, '20' in dtcheck]) and not any([dtcheck.lower().startswith('v19'), dtcheck.lower().startswith('v20')]) and len(dtcheck) >=4:
Expand Down Expand Up @@ -775,11 +775,11 @@ def parseit(self, path, filename, subpath=None):
for x in possible_years:
logger.info('yearposition[%s] -- dc[position][%s]' % (yearposition, x['yearposition']))
if yearposition < x['yearposition']:
if all([len(possible_issuenumbers) == 1, possible_issuenumbers[0]['number'] == x['year'], x['yearposition'] != possible_issuenumbers[0]['position']]):
if all([len(possible_issuenumbers) == 1, possible_issuenumbers[0]['number'] == x['year'], x['yearposition'] != possible_issuenumbers[0]['position']]):
issue2year = True
highest_series_pos = x['yearposition']
yearposition = x['yearposition']
yearmodposition = x['yearmodposition']
yearposition = x['yearposition']
yearmodposition = x['yearmodposition']

if highest_series_pos > yearposition: highest_series_pos = yearposition #dc['position']: highest_series_pos = dc['position']
else:
Expand All @@ -790,7 +790,6 @@ def parseit(self, path, filename, subpath=None):


logger.fdebug('highest_series_position: ' + str(highest_series_pos))

issue_number = None
dash_numbers = []
issue_number_position = len(split_file)
Expand All @@ -811,28 +810,28 @@ def parseit(self, path, filename, subpath=None):
for pis in sorted(possible_issuenumbers, key=operator.itemgetter('position'), reverse=True):
a = ' '.join(split_file)
lenn = pis['mod_position'] + len(pis['number'])
if lenn == len(a):
if lenn == len(a) and finddash != -1:
logger.fdebug('Numeric detected as the last digit after a hyphen. Typically this is the issue number.')
if pis['position'] != yearposition:
issue_number = pis['number']
logger.info('Issue set to: ' + str(issue_number))
issue_number_position = pis['position']
if highest_series_pos > pis['position']: highest_series_pos = pis['position']
#break
if pis['validcountchk'] == True:
elif pis['validcountchk'] == True:
issue_number = pis['number']
issue_number_position = pis['position']
logger.fdebug('Issue verified and detected as part of a numeric count sequnce: ' + issue_number)
if highest_series_pos > pis['position']: highest_series_pos = pis['position']
break
if pis['mod_position'] > finddash and finddash != -1:
elif pis['mod_position'] > finddash and finddash != -1:
if finddash < yearposition and finddash > (yearmodposition + len(split_file[yearposition])):
logger.fdebug('issue number is positioned after a dash - probably not an issue number, but part of an issue title')
dash_numbers.append({'mod_position': pis['mod_position'],
'number': pis['number'],
'position': pis['position']})
continue
if yearposition == pis['position']:
elif yearposition == pis['position']:
logger.fdebug('Already validated year, ignoring as possible issue number: ' + str(pis['number']))
continue
if p == 1:
Expand Down Expand Up @@ -934,8 +933,10 @@ def parseit(self, path, filename, subpath=None):
break
else:
try:
if possible_years[0]['yearposition'] <= highest_series_pos:
if possible_years[0]['yearposition'] <= highest_series_pos and possible_years[0]['year_position'] != 0:
highest_series_pos = possible_years[0]['yearposition']
elif possible_years[0]['year_position'] == 0:
yearposition = 1
except:
pass

Expand Down Expand Up @@ -1013,7 +1014,14 @@ def parseit(self, path, filename, subpath=None):
#here we should account for some characters that get stripped out due to the regex's
#namely, unique characters - known so far: +
#c1 = '+'
series_name = ' '.join(split_file[:highest_series_pos])
#series_name = ' '.join(split_file[:highest_series_pos])
if yearposition != 0:
series_name = ' '.join(split_file[:highest_series_pos])
else:
if highest_series_pos <= issue_number_position and all([len(split_file[0]) == 4, split_file[0].isdigit()]):
series_name = ' '.join(split_file[:highest_series_pos])
else:
series_name = ' '.join(split_file[yearposition+1:highest_series_pos])

for x in list(wrds):
if x != '':
Expand Down Expand Up @@ -1060,11 +1068,19 @@ def parseit(self, path, filename, subpath=None):
#check for annual in title(s) here.
if not self.justparse and all([mylar.CONFIG.ANNUALS_ON, 'annual' not in self.watchcomic.lower(), 'special' not in self.watchcomic.lower()]):
if 'annual' in series_name.lower():
issue_number = 'Annual ' + str(issue_number)
isn = 'Annual'
if issue_number is not None:
issue_number = '%s %s' % (isn, issue_number)
else:
issue_number = isn
series_name = re.sub('annual', '', series_name, flags=re.I).strip()
series_name_decoded = re.sub('annual', '', series_name_decoded, flags=re.I).strip()
elif 'special' in series_name.lower():
issue_number = 'Special ' + str(issue_number)
isn = 'Special'
if issue_number is not None:
issue_number = '%s %s' % (isn, issue_number)
else:
issue_number = isn
series_name = re.sub('special', '', series_name, flags=re.I).strip()
series_name_decoded = re.sub('special', '', series_name_decoded, flags=re.I).strip()

Expand Down Expand Up @@ -1179,7 +1195,9 @@ def matchIT(self, series_info):

if mylar.CONFIG.ANNUALS_ON and 'annual' not in nspace_watchcomic.lower():
if 'annual' in series_name.lower():
justthedigits = 'Annual ' + series_info['issue_number']
justthedigits = 'Annual'
if series_info['issue_number'] is not None:
justthedigits += ' %s' % series_info['issue_number']
nspace_seriesname = re.sub('annual', '', nspace_seriesname.lower()).strip()
nspace_seriesname_decoded = re.sub('annual', '', nspace_seriesname_decoded.lower()).strip()
if alt_series is not None and 'annual' in alt_series.lower():
Expand Down
70 changes: 54 additions & 16 deletions mylar/getcomics.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

class GC(object):

def __init__(self, query=None, issueid=None, comicid=None):
def __init__(self, query=None, issueid=None, comicid=None, oneoff=False):

self.valreturn = []

Expand All @@ -46,6 +46,8 @@ def __init__(self, query=None, issueid=None, comicid=None):

self.issueid = issueid

self.oneoff = oneoff

self.local_filename = os.path.join(mylar.CONFIG.CACHE_DIR, "getcomics.html")

self.headers = {'Accept-encoding': 'gzip', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1', 'Referer': 'https://getcomics.info/'}
Expand All @@ -55,7 +57,7 @@ def search(self):
with cfscrape.create_scraper() as s:
cf_cookievalue, cf_user_agent = s.get_tokens(self.url, headers=self.headers)

t = s.get(self.url+'/', params={'s': self.query}, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True)
t = s.get(self.url+'/', params={'s': self.query}, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True, timeout=30)

with open(self.local_filename, 'wb') as f:
for chunk in t.iter_content(chunk_size=1024):
Expand All @@ -70,7 +72,7 @@ def loadsite(self, id, link):
with cfscrape.create_scraper() as s:
self.cf_cookievalue, cf_user_agent = s.get_tokens(link, headers=self.headers)

t = s.get(link, verify=True, cookies=self.cf_cookievalue, headers=self.headers, stream=True)
t = s.get(link, verify=True, cookies=self.cf_cookievalue, headers=self.headers, stream=True, timeout=30)

with open(title+'.html', 'wb') as f:
for chunk in t.iter_content(chunk_size=1024):
Expand Down Expand Up @@ -251,12 +253,24 @@ def parse_downloadresults(self, id, mainlink):
volume = x.findNext(text=True)
if u'\u2013' in volume:
volume = re.sub(u'\u2013', '-', volume)
series_st = volume.find('(')
issues_st = volume.find('#')
series = volume[:issues_st].strip()
issues = volume[issues_st:series_st].strip()
year_end = volume.find(')', series_st+1)
year = re.sub('[\(\)\|]', '', volume[series_st+1: year_end]).strip()
size_end = volume.find(')', year_end+1)
size = re.sub('[\(\)\|]', '', volume[year_end+1: size_end]).strip()
linkline = x.find('a')
linked = linkline['href']
site = linkline.findNext(text=True)
links.append({"volume": volume,
"site": site,
"link": linked})
links.append({"series": series,
"volume": volume,
"site": site,
"year": year,
"issues": issues,
"size": size,
"link": linked})

if all([link is None, len(links) == 0]):
logger.warn('Unable to retrieve any valid immediate download links. They might not exist.')
Expand All @@ -265,8 +279,11 @@ def parse_downloadresults(self, id, mainlink):
logger.info('only one item discovered, changing queue length to accomodate: %s [%s]' % (link, type(link)))
links = [link]
elif len(links) > 0:
if link is not None:
links.append(link)
logger.fdebug('[DDL-QUEUE] Making sure we download the original item in addition to the extra packs.')
if len(links) > 1:
logger.info('[DDL-QUEUER] This pack has been broken up into %s separate packs - queueing each in sequence for your enjoyment.' % len(links))
logger.fdebug('[DDL-QUEUER] This pack has been broken up into %s separate packs - queueing each in sequence for your enjoyment.' % len(links))
cnt = 1
for x in links:
if len(links) == 1:
Expand Down Expand Up @@ -295,13 +312,15 @@ def parse_downloadresults(self, id, mainlink):
'size': x['size'],
'comicid': self.comicid,
'issueid': self.issueid,
'oneoff': self.oneoff,
'id': mod_id,
'resume': None})
cnt+=1

return {'success': True}

def downloadit(self, id, link, mainlink, resume=None):
#logger.info('[%s] %s -- mainlink: %s' % (id, link, mainlink))
if mylar.DDL_LOCK is True:
logger.fdebug('[DDL] Another item is currently downloading via DDL. Only one item can be downloaded at a time using DDL. Patience.')
return
Expand All @@ -315,8 +334,8 @@ def downloadit(self, id, link, mainlink, resume=None):
if resume is not None:
logger.info('[DDL-RESUME] Attempting to resume from: %s bytes' % resume)
self.headers['Range'] = 'bytes=%d-' % resume
cf_cookievalue, cf_user_agent = s.get_tokens(mainlink, headers=self.headers)
t = s.get(link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True)
cf_cookievalue, cf_user_agent = s.get_tokens(mainlink, headers=self.headers, timeout=30)
t = s.get(link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True, timeout=30)

filename = os.path.basename(urllib.unquote(t.url).decode('utf-8'))
if 'GetComics.INFO' in filename:
Expand All @@ -326,13 +345,32 @@ def downloadit(self, id, link, mainlink, resume=None):
remote_filesize = int(t.headers['Content-length'])
logger.fdebug('remote filesize: %s' % remote_filesize)
except Exception as e:
logger.warn('[WARNING] Unable to retrieve remote file size - this is usually due to the page being behind a different click-bait/ad page. Error returned as : %s' % e)
logger.warn('[WARNING] Considering this particular download as invalid and will ignore this result.')
remote_filesize = 0
mylar.DDL_LOCK = False
return ({"success": False,
"filename": filename,
"path": None})
if 'go.php-urls' not in link:
link = re.sub('go.php-url=', 'go.php-urls', link)
t = s.get(link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True, timeout=30)
filename = os.path.basename(urllib.unquote(t.url).decode('utf-8'))
if 'GetComics.INFO' in filename:
filename = re.sub('GetComics.INFO', '', filename, re.I).strip()
try:
remote_filesize = int(t.headers['Content-length'])
logger.fdebug('remote filesize: %s' % remote_filesize)
except Exception as e:
logger.warn('[WARNING] Unable to retrieve remote file size - this is usually due to the page being behind a different click-bait/ad page. Error returned as : %s' % e)
logger.warn('[WARNING] Considering this particular download as invalid and will ignore this result.')
remote_filesize = 0
mylar.DDL_LOCK = False
return ({"success": False,
"filename": filename,
"path": None})

else:
logger.warn('[WARNING] Unable to retrieve remote file size - this is usually due to the page being behind a different click-bait/ad page. Error returned as : %s' % e)
logger.warn('[WARNING] Considering this particular download as invalid and will ignore this result.')
remote_filesize = 0
mylar.DDL_LOCK = False
return ({"success": False,
"filename": filename,
"path": None})

#write the filename to the db for tracking purposes...
myDB.upsert('ddl_info', {'filename': filename, 'remote_filesize': remote_filesize}, {'id': id})
Expand Down
Loading

0 comments on commit 3f30fae

Please sign in to comment.