From 3b832a4564299c1a0b4528d8b7b718f2620f2e6d Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Thu, 18 Sep 2014 09:47:50 -0400 Subject: [PATCH 01/26] updated guessit and babelfish minimum requirements --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a83b650ee..e54cf28f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ beautifulsoup4>=4.3.2 -guessit>=0.6.2,<0.7 +guessit>=0.7 requests>=2.0.1 enzyme>=0.4.0 html5lib>=0.99 dogpile.cache>=0.5.2 -babelfish>=0.4.0,<0.5 +babelfish>=0.5.0 charade>=1.0.3 pysrt>=0.5.0 From e6dc714f7ae63a4074130e61991c01fccd419a51 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Thu, 18 Sep 2014 09:48:22 -0400 Subject: [PATCH 02/26] Eliminated Dict Comprehensions (PEP 274) references to allow subliminal to work with python v2.4+ --- subliminal/__init__.py | 5 +- subliminal/api.py | 5 +- subliminal/cli.py | 4 +- subliminal/converters/addic7ed.py | 4 +- subliminal/converters/podnapisi.py | 2 +- subliminal/converters/tvsubtitles.py | 6 +-- subliminal/providers/addic7ed.py | 4 +- subliminal/providers/bierdopje.py | 8 ++- subliminal/providers/opensubtitles.py | 2 +- subliminal/providers/podnapisi.py | 5 +- subliminal/providers/thesubdb.py | 4 +- subliminal/providers/tvsubtitles.py | 7 +-- subliminal/subtitle.py | 8 +-- subliminal/tests/test_providers.py | 72 +++++++++++++-------------- subliminal/video.py | 8 +-- 15 files changed, 77 insertions(+), 67 deletions(-) diff --git a/subliminal/__init__.py b/subliminal/__init__.py index 21e323481..0dcc7dddc 100644 --- a/subliminal/__init__.py +++ b/subliminal/__init__.py @@ -12,5 +12,8 @@ from .subtitle import Subtitle from .video import VIDEO_EXTENSIONS, SUBTITLE_EXTENSIONS, Video, Episode, Movie, scan_videos, scan_video +class NullHandler(logging.Handler): + def emit(self, record): + pass -logging.getLogger(__name__).addHandler(logging.NullHandler()) +logging.getLogger(__name__).addHandler(NullHandler()) diff --git a/subliminal/api.py b/subliminal/api.py index ed840a614..4da52950c 100644 --- a/subliminal/api.py +++ b/subliminal/api.py @@ -92,11 +92,12 @@ def download_subtitles(subtitles, provider_configs=None, single=False): """ provider_configs = provider_configs or {} discarded_providers = set() - providers_by_name = {ep.name: ep.load() for ep in pkg_resources.iter_entry_points(PROVIDERS_ENTRY_POINT)} + providers_by_name = dict([(ep.name, ep.load()) for ep in pkg_resources.iter_entry_points(PROVIDERS_ENTRY_POINT)]) + initialized_providers = {} try: for video, video_subtitles in subtitles.items(): - languages = {subtitle.language for subtitle in video_subtitles} + languages = dict([subtitle.language for subtitle in video_subtitles]) downloaded_languages = set() for subtitle in video_subtitles: # filter diff --git a/subliminal/cli.py b/subliminal/cli.py index 2b5a916f0..c476269d8 100644 --- a/subliminal/cli.py +++ b/subliminal/cli.py @@ -81,7 +81,7 @@ def subliminal(): # parse languages try: - args.languages = {babelfish.Language.fromietf(l) for l in args.languages} + args.languages = set( babelfish.Language.fromietf(l) for l in args.languages ) except babelfish.Error: parser.error('argument -l/--languages: codes are not IETF: %r' % args.languages) @@ -90,7 +90,7 @@ def subliminal(): match = re.match(r'^(?:(?P\d+?)w)?(?:(?P\d+?)d)?(?:(?P\d+?)h)?$', args.age) if not match: parser.error('argument -a/--age: invalid age: %r' % args.age) - args.age = datetime.timedelta(**{k: int(v) for k, v in match.groupdict(0).items()}) + args.age = datetime.timedelta(**dict([(k, int(v)) for k, v in match.groupdict(0).items()])) # parse cache-file args.cache_file = os.path.abspath(os.path.expanduser(args.cache_file)) diff --git a/subliminal/converters/addic7ed.py b/subliminal/converters/addic7ed.py index 2915a2b7d..0e862931d 100644 --- a/subliminal/converters/addic7ed.py +++ b/subliminal/converters/addic7ed.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -from babelfish import LanguageReverseConverter, get_language_converter +from babelfish import LanguageReverseConverter, language_converters class Addic7edConverter(LanguageReverseConverter): def __init__(self): - self.name_converter = get_language_converter('name') + self.name_converter = language_converters['name'] self.from_addic7ed = {'CatalĂ ': ('cat',), 'Chinese (Simplified)': ('zho',), 'Chinese (Traditional)': ('zho',), 'Euskera': ('eus',), 'Galego': ('glg',), 'Greek': ('ell',), 'Malay': ('msa',), 'Portuguese (Brazilian)': ('por', 'BR'), 'Serbian (Cyrillic)': ('srp', None, 'Cyrl'), diff --git a/subliminal/converters/podnapisi.py b/subliminal/converters/podnapisi.py index d73cb1c1f..6b909fb5f 100644 --- a/subliminal/converters/podnapisi.py +++ b/subliminal/converters/podnapisi.py @@ -14,7 +14,7 @@ def __init__(self): 11: ('jpn',), 4: ('kor',), 29: ('sqi',), 6: ('isl',), 19: ('lit',), 46: ('ukr',), 44: ('tha',), 53: ('cat',), 56: ('sin',), 21: ('lav',), 40: ('cmn',), 55: ('msa',), 42: ('hin',), 50: ('bel',)} - self.to_podnapisi = {v: k for k, v in self.from_podnapisi.items()} + self.to_podnapisi = dict([(v, k) for k, v in self.from_podnapisi.items()]) self.codes = set(self.from_podnapisi.keys()) def convert(self, alpha3, country=None, script=None): diff --git a/subliminal/converters/tvsubtitles.py b/subliminal/converters/tvsubtitles.py index d817a2711..196134787 100644 --- a/subliminal/converters/tvsubtitles.py +++ b/subliminal/converters/tvsubtitles.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -from babelfish import LanguageReverseConverter, get_language_converter +from babelfish import LanguageReverseConverter, language_converters class TVsubtitlesConverter(LanguageReverseConverter): def __init__(self): - self.alpha2_converter = get_language_converter('alpha2') + self.alpha2_converter = language_converters['alpha2'] self.from_tvsubtitles = {'br': ('por', 'BR'), 'ua': ('ukr',), 'gr': ('ell',), 'cn': ('zho',), 'jp': ('jpn',), 'cz': ('ces',)} - self.to_tvsubtitles = {v: k for k, v in self.from_tvsubtitles} + self.to_tvsubtitles = set([(v, k) for k, v in self.from_tvsubtitles]) self.codes = self.alpha2_converter.codes | set(self.from_tvsubtitles.keys()) def convert(self, alpha3, country=None, script=None): diff --git a/subliminal/providers/addic7ed.py b/subliminal/providers/addic7ed.py index 79c7622e7..10ced1463 100644 --- a/subliminal/providers/addic7ed.py +++ b/subliminal/providers/addic7ed.py @@ -53,11 +53,11 @@ def compute_matches(self, video): class Addic7edProvider(Provider): - languages = {babelfish.Language('por', 'BR')} | {babelfish.Language(l) + languages = set([babelfish.Language('por', 'BR')]) | set([babelfish.Language(l) for l in ['ara', 'aze', 'ben', 'bos', 'bul', 'cat', 'ces', 'dan', 'deu', 'ell', 'eng', 'eus', 'fas', 'fin', 'fra', 'glg', 'heb', 'hrv', 'hun', 'hye', 'ind', 'ita', 'jpn', 'kor', 'mkd', 'msa', 'nld', 'nor', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'tha', - 'tur', 'ukr', 'vie', 'zho']} + 'tur', 'ukr', 'vie', 'zho']]) video_types = (Episode,) server = 'http://www.addic7ed.com' diff --git a/subliminal/providers/bierdopje.py b/subliminal/providers/bierdopje.py index b8f5a5f24..dd7d06735 100644 --- a/subliminal/providers/bierdopje.py +++ b/subliminal/providers/bierdopje.py @@ -49,7 +49,7 @@ def compute_matches(self, video): class BierDopjeProvider(Provider): - languages = {babelfish.Language(l) for l in ['eng', 'nld']} + languages = set([babelfish.Language(l) for l in ['eng', 'nld']]) video_types = (Episode,) def initialize(self): @@ -93,7 +93,11 @@ def find_show_id(self, series): if root.find('response/status').text == 'false': logger.info('Series %r not found', series) return None - return int(root.find('response/results/result[1]/showid').text) + try: + return int(root.find('response/results/result[1]/showid').text) + except SyntaxError: + # Python < 2.7; fail gracefully + return None def query(self, language, season, episode, tvdb_id=None, series=None): params = {'language': language.alpha2, 'season': season, 'episode': episode} diff --git a/subliminal/providers/opensubtitles.py b/subliminal/providers/opensubtitles.py index b98a29dbd..969e1ba4c 100644 --- a/subliminal/providers/opensubtitles.py +++ b/subliminal/providers/opensubtitles.py @@ -83,7 +83,7 @@ def compute_matches(self, video): class OpenSubtitlesProvider(Provider): - languages = {babelfish.Language.fromopensubtitles(l) for l in babelfish.get_language_converter('opensubtitles').codes} + languages = set([babelfish.Language.fromopensubtitles(l) for l in babelfish.language_converters['opensubtitles'].codes]) def __init__(self): self.server = xmlrpclib.ServerProxy('http://api.opensubtitles.org/xml-rpc') diff --git a/subliminal/providers/podnapisi.py b/subliminal/providers/podnapisi.py index a3815f5ab..abbf9b7e7 100644 --- a/subliminal/providers/podnapisi.py +++ b/subliminal/providers/podnapisi.py @@ -3,6 +3,7 @@ import io import logging import re +import contextlib import xml.etree.ElementTree import zipfile import babelfish @@ -67,7 +68,7 @@ def compute_matches(self, video): class PodnapisiProvider(Provider): - languages = {babelfish.Language.frompodnapisi(l) for l in babelfish.get_language_converter('podnapisi').codes} + languages = set([babelfish.Language.frompodnapisi(l) for l in babelfish.language_converters['podnapisi'].codes]) video_types = (Episode, Movie) server = 'http://simple.podnapisi.net' link_re = re.compile('^.*(?P/ppodnapisi/download/i/\d+/k/.*$)') @@ -153,7 +154,7 @@ def download_subtitle(self, subtitle): raise ProviderNotAvailable('Timeout after 10 seconds') if r.status_code != 200: raise ProviderNotAvailable('Request failed with status code %d' % r.status_code) - with zipfile.ZipFile(io.BytesIO(r.content)) as zf: + with contextlib.closing(zipfile.ZipFile(io.BytesIO(r.content))) as zf: if len(zf.namelist()) > 1: raise ProviderError('More than one file to unzip') subtitle_bytes = zf.read(zf.namelist()[0]) diff --git a/subliminal/providers/thesubdb.py b/subliminal/providers/thesubdb.py index e610a9d49..726554f79 100644 --- a/subliminal/providers/thesubdb.py +++ b/subliminal/providers/thesubdb.py @@ -29,7 +29,7 @@ def compute_matches(self, video): class TheSubDBProvider(Provider): - languages = {babelfish.Language.fromalpha2(l) for l in ['en', 'es', 'fr', 'it', 'nl', 'pl', 'pt', 'ro', 'sv', 'tr']} + languages = set([babelfish.Language.fromalpha2(l) for l in ['en', 'es', 'fr', 'it', 'nl', 'pl', 'pt', 'ro', 'sv', 'tr']]) required_hash = 'thesubdb' def initialize(self): @@ -65,7 +65,7 @@ def query(self, hash): # @ReservedAssignment elif r.status_code != 200: raise ProviderError('Request failed with status code %d' % r.status_code) return [TheSubDBSubtitle(language, hash) for language in - {babelfish.Language.fromalpha2(l) for l in r.content.split(',')}] + set([babelfish.Language.fromalpha2(l) for l in r.content.split(',')])] def list_subtitles(self, video, languages): return [s for s in self.query(video.hashes['thesubdb']) if s.language in languages] diff --git a/subliminal/providers/tvsubtitles.py b/subliminal/providers/tvsubtitles.py index e41fda606..e070bb290 100644 --- a/subliminal/providers/tvsubtitles.py +++ b/subliminal/providers/tvsubtitles.py @@ -3,6 +3,7 @@ import io import logging import re +import contextlib import zipfile import babelfish import bs4 @@ -56,9 +57,9 @@ def compute_matches(self, video): class TVsubtitlesProvider(Provider): - languages = {babelfish.Language('por', 'BR')} | {babelfish.Language(l) + languages = set([babelfish.Language('por', 'BR')]) | set([babelfish.Language(l) for l in ['ara', 'bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'fin', 'fra', 'hun', 'ita', 'jpn', 'kor', - 'nld', 'pol', 'por', 'ron', 'rus', 'spa', 'swe', 'tur', 'ukr', 'zho']} + 'nld', 'pol', 'por', 'ron', 'rus', 'spa', 'swe', 'tur', 'ukr', 'zho']]) video_types = (Episode,) server = 'http://www.tvsubtitles.net' episode_id_re = re.compile('^episode-\d+\.html$') @@ -165,7 +166,7 @@ def download_subtitle(self, subtitle): raise ProviderNotAvailable('Timeout after 10 seconds') if r.status_code != 200: raise ProviderNotAvailable('Request failed with status code %d' % r.status_code) - with zipfile.ZipFile(io.BytesIO(r.content)) as zf: + with contextlib.closing(zipfile.ZipFile(io.BytesIO(r.content))) as zf: if len(zf.namelist()) > 1: raise ProviderError('More than one file to unzip') subtitle_bytes = zf.read(zf.namelist()[0]) diff --git a/subliminal/subtitle.py b/subliminal/subtitle.py index f7d97f42d..fe92f41e0 100644 --- a/subliminal/subtitle.py +++ b/subliminal/subtitle.py @@ -62,13 +62,13 @@ def compute_score(self, video): # remove equivalences if isinstance(video, Episode): if 'imdb_id' in matches: - matches -= {'series', 'tvdb_id', 'season', 'episode', 'title'} + matches -= set(['series', 'tvdb_id', 'season', 'episode', 'title']) if 'tvdb_id' in matches: - matches -= {'series'} + matches -= set(['series',]) if 'title' in matches: - matches -= {'season', 'episode'} + matches -= set(['season', 'episode']) # add other scores - score += sum((video.scores[match] for match in matches)) + score += sum([video.scores[match] for match in matches]) logger.info('Computed score %d with matches %r', score, initial_matches) return score diff --git a/subliminal/tests/test_providers.py b/subliminal/tests/test_providers.py index 034e5773f..c47f2634c 100644 --- a/subliminal/tests/test_providers.py +++ b/subliminal/tests/test_providers.py @@ -40,51 +40,51 @@ def test_get_show_ids(self): def test_query_episode_0(self): video = EPISODES[0] - languages = {Language('tur'), Language('rus'), Language('heb'), Language('ita'), Language('fra'), + languages = (Language('tur'), Language('rus'), Language('heb'), Language('ita'), Language('fra'), Language('ron'), Language('nld'), Language('eng'), Language('deu'), Language('ell'), - Language('por', 'BR'), Language('bul')} - matches = {frozenset(['episode', 'release_group', 'title', 'series', 'resolution', 'season']), + Language('por', 'BR'), Language('bul')) + matches = (frozenset(['episode', 'release_group', 'title', 'series', 'resolution', 'season']), frozenset(['series', 'resolution', 'season']), frozenset(['series', 'episode', 'season', 'title']), frozenset(['series', 'release_group', 'season']), frozenset(['series', 'episode', 'season', 'release_group', 'title']), - frozenset(['series', 'season'])} + frozenset(['series', 'season'])) with self.Provider() as provider: subtitles = provider.query(video.series, video.season) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertEqual(set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]), matches) + self.assertEqual(set([subtitle.language for subtitle in subtitles]), languages) def test_query_episode_1(self): video = EPISODES[1] - languages = {Language('ind'), Language('spa'), Language('hrv'), Language('ita'), Language('fra'), + languages = (Language('ind'), Language('spa'), Language('hrv'), Language('ita'), Language('fra'), Language('cat'), Language('ell'), Language('nld'), Language('eng'), Language('fas'), Language('por'), Language('nor'), Language('deu'), Language('ron'), Language('por', 'BR'), - Language('bul')} - matches = {frozenset(['series', 'episode', 'resolution', 'season', 'title']), + Language('bul')) + matches = (frozenset(['series', 'episode', 'resolution', 'season', 'title']), frozenset(['series', 'resolution', 'season']), frozenset(['series', 'episode', 'season', 'title']), frozenset(['series', 'release_group', 'season']), frozenset(['series', 'resolution', 'release_group', 'season']), frozenset(['series', 'episode', 'season', 'release_group', 'title']), - frozenset(['series', 'season'])} + frozenset(['series', 'season'])) with self.Provider() as provider: subtitles = provider.query(video.series, video.season) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertEqual(set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]), matches) + self.assertEqual(set([subtitle.language for subtitle in subtitles]), languages) def test_list_subtitles(self): video = EPISODES[0] - languages = {Language('eng'), Language('fra')} - matches = {frozenset(['series', 'episode', 'season', 'release_group', 'title']), - frozenset(['series', 'episode', 'season', 'title'])} + languages = (Language('eng'), Language('fra')) + matches = (frozenset(['series', 'episode', 'season', 'release_group', 'title']), + frozenset(['series', 'episode', 'season', 'title'])) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertEqual(set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]), matches) + self.assertEqual(set([subtitle.language for subtitle in subtitles]), languages) def test_download_subtitle(self): video = EPISODES[0] - languages = {Language('eng'), Language('fra')} + languages = (Language('eng'), Language('fra')) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) subtitle_text = provider.download_subtitle(subtitles[0]) @@ -107,54 +107,54 @@ def test_find_show_id_error(self): def test_query_episode_0(self): video = EPISODES[0] language = Language('eng') - matches = {frozenset(['series', 'video_codec', 'resolution', 'episode', 'season']), + matches = (frozenset(['series', 'video_codec', 'resolution', 'episode', 'season']), frozenset(['season', 'video_codec', 'episode', 'series']), - frozenset(['episode', 'video_codec', 'season', 'series', 'resolution', 'release_group'])} + frozenset(['episode', 'video_codec', 'season', 'series', 'resolution', 'release_group'])) with self.Provider() as provider: subtitles = provider.query(language, video.season, video.episode, series=video.series) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, {language}) + self.assertEqual(set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]), matches) + self.assertEqual(set([subtitle.language for subtitle in subtitles]), (language,)) def test_query_episode_1(self): video = EPISODES[1] language = Language('nld') - matches = {frozenset(['series', 'video_codec', 'resolution', 'episode', 'season']), + matches = (frozenset(['series', 'video_codec', 'resolution', 'episode', 'season']), frozenset(['season', 'video_codec', 'episode', 'series']), frozenset(['series', 'episode', 'season']), frozenset(['season', 'video_codec', 'episode', 'release_group', 'series']), - frozenset(['episode', 'video_codec', 'season', 'series', 'resolution', 'release_group'])} + frozenset(['episode', 'video_codec', 'season', 'series', 'resolution', 'release_group'])) with self.Provider() as provider: subtitles = provider.query(language, video.season, video.episode, series=video.series) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, {language}) + self.assertEqual(set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]), matches) + self.assertEqual(set([subtitle.language for subtitle in subtitles]), (language,)) def test_query_episode_0_tvdb_id(self): video = EPISODES[0] language = Language('eng') - matches = {frozenset(['video_codec', 'tvdb_id', 'episode', 'season', 'series']), + matches = (frozenset(['video_codec', 'tvdb_id', 'episode', 'season', 'series']), frozenset(['episode', 'video_codec', 'series', 'season', 'tvdb_id', 'resolution', 'release_group']), - frozenset(['episode', 'series', 'video_codec', 'tvdb_id', 'resolution', 'season'])} + frozenset(['episode', 'series', 'video_codec', 'tvdb_id', 'resolution', 'season'])) with self.Provider() as provider: subtitles = provider.query(language, video.season, video.episode, tvdb_id=video.tvdb_id) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, {language}) + self.assertEqual(set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]), matches) + self.assertEqual(set([subtitle.language for subtitle in subtitles]), (language,)) def test_list_subtitles(self): video = EPISODES[1] - languages = {Language('eng'), Language('nld')} - matches = {frozenset(['series', 'video_codec', 'tvdb_id', 'episode', 'season']), + languages = (Language('eng'), Language('nld')) + matches = (frozenset(['series', 'video_codec', 'tvdb_id', 'episode', 'season']), frozenset(['episode', 'video_codec', 'season', 'series', 'tvdb_id', 'resolution', 'release_group']), frozenset(['season', 'tvdb_id', 'episode', 'series']), frozenset(['episode', 'video_codec', 'season', 'series', 'tvdb_id', 'resolution']), - frozenset(['episode', 'video_codec', 'season', 'series', 'tvdb_id', 'release_group'])} + frozenset(['episode', 'video_codec', 'season', 'series', 'tvdb_id', 'release_group'])) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertEqual(set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]), matches) + self.assertEqual(set([subtitle.language for subtitle in subtitles]), languages) def test_download_subtitle(self): video = EPISODES[0] - languages = {Language('eng'), Language('nld')} + languages = (Language('eng'), Language('nld')) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) subtitle_text = provider.download_subtitle(subtitles[0]) diff --git a/subliminal/video.py b/subliminal/video.py index 017aa596e..1d5487e80 100644 --- a/subliminal/video.py +++ b/subliminal/video.py @@ -156,7 +156,7 @@ def scan_subtitle_languages(path): :rtype: set """ - language_extensions = tuple('.' + c for c in babelfish.get_language_converter('alpha2').codes) + language_extensions = tuple('.' + c for c in babelfish.language_converters['alpha2'].codes) dirpath, filename = os.path.split(path) subtitles = set() for p in os.listdir(dirpath): @@ -292,12 +292,12 @@ def scan_videos(paths, subtitles=True, embedded_subtitles=True, age=None): for dirpath, dirnames, filenames in os.walk(path): # skip badly encoded directories if isinstance(dirpath, bytes): - logger.error('Skipping badly encoded directory %r', dirpath.decode('utf-8', errors='replace')) + logger.error('Skipping badly encoded directory %r', dirpath.decode('utf-8', 'replace')) continue # skip badly encoded and hidden sub directories for dirname in list(dirnames): if isinstance(dirname, bytes): - logger.error('Skipping badly encoded dirname %r in %r', dirname.decode('utf-8', errors='replace'), + logger.error('Skipping badly encoded dirname %r in %r', dirname.decode('utf-8', 'replace'), dirpath) dirnames.remove(dirname) elif dirname.startswith('.'): @@ -307,7 +307,7 @@ def scan_videos(paths, subtitles=True, embedded_subtitles=True, age=None): for filename in filenames: # skip badly encoded files if isinstance(filename, bytes): - logger.error('Skipping badly encoded filename %r in %r', filename.decode('utf-8', errors='replace'), + logger.error('Skipping badly encoded filename %r in %r', filename.decode('utf-8', 'replace'), dirpath) continue # filter videos From 14c7443635ac81879eb6c07127ffb01b4ac8821e Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Thu, 18 Sep 2014 09:50:56 -0400 Subject: [PATCH 03/26] applied guessit v0.7 support --- subliminal/cli.py | 2 +- subliminal/video.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subliminal/cli.py b/subliminal/cli.py index c476269d8..350c1812c 100644 --- a/subliminal/cli.py +++ b/subliminal/cli.py @@ -146,7 +146,7 @@ def subliminal(): embedded_subtitles=not args.force, age=args.age) # guess videos - videos.extend([Video.fromguess(os.path.split(p)[1], guessit.guess_file_info(p, 'autodetect')) for p in args.paths + videos.extend([Video.fromguess(os.path.split(p)[1], guessit.guess_file_info(p, info=['filename'])) for p in args.paths if not os.path.exists(p)]) # download best subtitles diff --git a/subliminal/video.py b/subliminal/video.py index 1d5487e80..a9d00059d 100644 --- a/subliminal/video.py +++ b/subliminal/video.py @@ -182,7 +182,7 @@ def scan_video(path, subtitles=True, embedded_subtitles=True): """ dirpath, filename = os.path.split(path) logger.info('Scanning video %r in %r', filename, dirpath) - video = Video.fromguess(path, guessit.guess_file_info(path, 'autodetect')) + video = Video.fromguess(path, guessit.guess_file_info(path, info=['filename'])) video.size = os.path.getsize(path) if video.size > 10485760: logger.debug('Size is %d', video.size) From 71d206ad464b68ffac163717ebcf793511aeeae6 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Thu, 18 Sep 2014 09:51:39 -0400 Subject: [PATCH 04/26] allow searching for subtitles by best score; not exclusively hearing impaired (HI) or non-HI --- subliminal/api.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/subliminal/api.py b/subliminal/api.py index 4da52950c..e6fecda3e 100644 --- a/subliminal/api.py +++ b/subliminal/api.py @@ -233,9 +233,10 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= if subtitle.provider_name in discarded_providers: logger.debug('Skipping subtitle from discarded provider %r', subtitle.provider_name) continue - if subtitle.hearing_impaired != hearing_impaired: - logger.debug('Skipping subtitle: hearing impaired != %r', hearing_impaired) - continue + if hearing_impaired is not None: + if subtitle.hearing_impaired != hearing_impaired: + logger.debug('Skipping subtitle: hearing impaired != %r', hearing_impaired) + continue if score < min_score: logger.debug('Skipping subtitle: score < %d', min_score) continue From 73a4e5a3afabe56c63e1334edda717d27ef981dd Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Thu, 18 Sep 2014 09:52:48 -0400 Subject: [PATCH 05/26] subliminal bugfix to prevent multiple matched subtitles from different providers being downloaded. Just download 1 (the best matched); bugfix --- subliminal/api.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/subliminal/api.py b/subliminal/api.py index e6fecda3e..edc6ba394 100644 --- a/subliminal/api.py +++ b/subliminal/api.py @@ -102,6 +102,7 @@ def download_subtitles(subtitles, provider_configs=None, single=False): for subtitle in video_subtitles: # filter if subtitle.language in downloaded_languages: + logger.debug('Skipping subtitle: %r already downloaded', subtitle.language) continue if subtitle.provider_name in discarded_providers: logger.debug('Skipping subtitle from discarded provider %r', subtitle.provider_name) @@ -138,8 +139,11 @@ def download_subtitles(subtitles, provider_configs=None, single=False): with io.open(subtitle_path, 'w', encoding='utf-8') as f: f.write(subtitle_text) downloaded_languages.add(subtitle.language) - if single or downloaded_languages == languages: + if single or sorted(downloaded_languages) == sorted(languages): break + # handle outerloop + if single or sorted(downloaded_languages) == sorted(languages): + break finally: # terminate providers for (provider_name, provider) in initialized_providers.items(): try: @@ -201,6 +205,7 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= for video in videos: # search for subtitles subtitles = [] + downloaded_languages = set() for provider_name, provider in initialized_providers.items(): if provider.check(video): if provider_name in discarded_providers: @@ -226,7 +231,7 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= subtitles.extend(provider_subtitles) # find the best subtitles and download them - downloaded_languages = video.subtitle_languages.copy() + languages = video.subtitle_languages.copy() for subtitle, score in sorted([(s, s.compute_score(video)) for s in subtitles], key=operator.itemgetter(1), reverse=True): # filter @@ -264,9 +269,12 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= with io.open(subtitle_path, 'w', encoding='utf-8') as f: f.write(subtitle_text) downloaded_languages.add(subtitle.language) - if single or downloaded_languages >= languages: - logger.debug('All languages downloaded') + if single or sorted(downloaded_languages) == sorted(languages): break + # handle outer loop (prevent second iteration if it is unnessisary) + if single or sorted(downloaded_languages) == sorted(languages): + break + finally: # terminate providers for (provider_name, provider) in initialized_providers.items(): try: From 3b52a9385fbe2898254834d1d9f0265b8df8f7a0 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Thu, 18 Sep 2014 10:00:21 -0400 Subject: [PATCH 06/26] added ability to prioritize multiple matched subtitles; Download Hearing Impaired (HI) first before non-HI or vs versa. This is kind of an extension to the bestscore enhancment already added in a previous commit. --- subliminal/api.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/subliminal/api.py b/subliminal/api.py index edc6ba394..fe1cff2d7 100644 --- a/subliminal/api.py +++ b/subliminal/api.py @@ -155,7 +155,7 @@ def download_subtitles(subtitles, provider_configs=None, single=False): def download_best_subtitles(videos, languages, providers=None, provider_configs=None, single=False, min_score=0, - hearing_impaired=False): + hearing_impaired=False, hi_score_adjust=0): """Download the best subtitles for `videos` with the given `languages` using the specified `providers` :param videos: videos to download subtitles for @@ -169,6 +169,7 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= :param bool single: download with .srt extension if `True`, add language identifier otherwise :param int min_score: minimum score for subtitles to download :param bool hearing_impaired: download hearing impaired subtitles + :param int hi_score_adjust: Adjust hearing_impaired_scores if matched. """ provider_configs = provider_configs or {} @@ -242,6 +243,10 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= if subtitle.hearing_impaired != hearing_impaired: logger.debug('Skipping subtitle: hearing impaired != %r', hearing_impaired) continue + elif subtitle.hearing_impaired and hi_score_adjust != 0: + # Priortization (adjust score) + score += hi_score_adjust + if score < min_score: logger.debug('Skipping subtitle: score < %d', min_score) continue From 88a2cb968124155bf48ea2cb3d5944233430c6d3 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Thu, 18 Sep 2014 10:02:22 -0400 Subject: [PATCH 07/26] added graceful handling of subtitle providers that are simply offline or unavailable. --- subliminal/api.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/subliminal/api.py b/subliminal/api.py index fe1cff2d7..dd30a10ea 100644 --- a/subliminal/api.py +++ b/subliminal/api.py @@ -8,7 +8,7 @@ import pkg_resources from .exceptions import ProviderNotAvailable, InvalidSubtitle from .subtitle import get_subtitle_path - +from socket import error as socket_error logger = logging.getLogger(__name__) @@ -119,6 +119,15 @@ def download_subtitles(subtitles, provider_configs=None, single=False): logger.warning('Provider %r is not available, discarding it', subtitle.provider_name) discarded_providers.add(subtitle.provider_name) continue + except socket_error as err: + logger.warning('Provider %r is not responding, discarding it', subtitle.provider_name) + logger.debug('Provider socket error: %r', str(err)) + discarded_providers.add(subtitle.provider_name) + continue + except: + logger.exception('Unexpected error in provider %r', subtitle.provider_name) + discarded_providers.add(subtitle.provider_name) + continue initialized_providers[subtitle.provider_name] = provider # download subtitles @@ -201,6 +210,13 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= except ProviderNotAvailable: logger.warning('Provider %r is not available, discarding it', provider_entry_point.name) continue + except socket_error as err: + logger.warning('Provider %r is not responding, discarding it', provider_entry_point.name) + logger.debug('Provider socket error: %r', str(err)) + continue + except: + logger.exception('Unexpected error in provider %r', provider_entry_point.name) + continue initialized_providers[provider_entry_point.name] = provider try: for video in videos: From 4702284e87eb0f261fbee1d313e734ca6b0c727f Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Thu, 18 Sep 2014 10:03:20 -0400 Subject: [PATCH 08/26] podnapisi website changes maded in Aug 2014 broke this provider in subliminal. Provider now supports new page layout --- subliminal/providers/podnapisi.py | 56 ++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/subliminal/providers/podnapisi.py b/subliminal/providers/podnapisi.py index abbf9b7e7..b550953ef 100644 --- a/subliminal/providers/podnapisi.py +++ b/subliminal/providers/podnapisi.py @@ -19,7 +19,10 @@ logger = logging.getLogger(__name__) - +URL_RE = re.compile( + '^((http[s]?|ftp):\/)?\/?([^:\/\s]+)(:([^\/]*))?((\/\w+)*\/)' + \ + '([\w\-\.]+[^#?\s]+)(\?([^#]*))?(#(.*))?$', +) class PodnapisiSubtitle(Subtitle): provider_name = 'podnapisi' @@ -30,7 +33,7 @@ def __init__(self, language, id, releases, hearing_impaired, link, series=None, self.id = id self.releases = releases self.hearing_impaired = hearing_impaired - self.link = link + self.link = '/ppodnapisi' + link self.series = series self.season = season self.episode = episode @@ -71,7 +74,8 @@ class PodnapisiProvider(Provider): languages = set([babelfish.Language.frompodnapisi(l) for l in babelfish.language_converters['podnapisi'].codes]) video_types = (Episode, Movie) server = 'http://simple.podnapisi.net' - link_re = re.compile('^.*(?P/ppodnapisi/download/i/\d+/k/.*$)') + pre_link_re = re.compile('^.*(?P/ppodnapisi/predownload/i/\d+/k/.*$)') + link_re = re.compile('^.*(?P/[a-zA-Z]{2}/ppodnapisi/download/i/\d+/k/.*$)') def initialize(self): self.session = requests.Session() @@ -80,19 +84,30 @@ def initialize(self): def terminate(self): self.session.close() - def get(self, url, params=None, is_xml=True): + def get(self, url, params=None, headers=None, is_xml=True): """Make a GET request on `url` with the given parameters :param string url: part of the URL to reach with the leading slash :param dict params: params of the request + :param dict headers: headers of the request :param bool xml: whether the response content is XML or not :return: the response :rtype: :class:`xml.etree.ElementTree.Element` or :class:`bs4.BeautifulSoup` :raise: :class:`~subliminal.exceptions.ProviderNotAvailable` """ + + prefix_url = '' + url_result = URL_RE.search(url) + if url_result and url_result.group(2) is None: + prefix_url = self.server + try: - r = self.session.get(self.server + '/ppodnapisi' + url, params=params, timeout=10) + r = self.session.get( + prefix_url + url, params=params, + headers=headers, + timeout=10, + ) except requests.Timeout: raise ProviderNotAvailable('Timeout after 10 seconds') if r.status_code != 200: @@ -117,20 +132,30 @@ def query(self, language, series=None, season=None, episode=None, title=None, ye logger.debug('Searching episode %r', params) subtitles = [] while True: - root = self.get('/search', params) + root = self.get('/ppodnapisi/search', params) if not int(root.find('pagination/results').text): logger.debug('No subtitle found') break if series and season and episode: - subtitles.extend([PodnapisiSubtitle(language, int(s.find('id').text), s.find('release').text.split(), + try: + subtitles.extend([PodnapisiSubtitle(language, int(s.find('id').text), s.find('release').text.split(), 'h' in (s.find('flags').text or ''), s.find('url').text[38:], series=series, season=season, episode=episode) for s in root.findall('subtitle')]) + except AttributeError: + # there simply wasn't enough information in the TV Show + # gracefully handle this instead of crashing :) + break elif title: - subtitles.extend([PodnapisiSubtitle(language, int(s.find('id').text), s.find('release').text.split(), + try: + subtitles.extend([PodnapisiSubtitle(language, int(s.find('id').text), s.find('release').text.split(), 'h' in (s.find('flags').text or ''), s.find('url').text[38:], title=title, year=year) for s in root.findall('subtitle')]) + except AttributeError: + # there simply wasn't enough information in the movie + # gracefully handle this instead of crashing :) + break if int(root.find('pagination/current').text) >= int(root.find('pagination/count').text): break params['page'] = int(root.find('pagination/current').text) + 1 @@ -145,6 +170,21 @@ def list_subtitles(self, video, languages): def download_subtitle(self, subtitle): soup = self.get(subtitle.link, is_xml=False) + pre_link = soup.find('a', href=self.pre_link_re) + if not pre_link: + raise ProviderError('Cannot find the pre-download link') + pre_link = self.server + \ + self.pre_link_re.match(pre_link['href']).group('link') + + # Continue following the link + soup = self.get( + pre_link, + headers={ + 'Referer': self.server, + }, + is_xml=False, + ) + link = soup.find('a', href=self.link_re) if not link: raise ProviderError('Cannot find the download link') From 1e9588e5b8f4b38abb31f2b76e861e2f1936bc4b Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Sat, 18 Oct 2014 16:18:42 -0400 Subject: [PATCH 09/26] Added support for titles that contain quotes --- subliminal/cache.py | 2 +- subliminal/providers/__init__.py | 3 +++ subliminal/providers/addic7ed.py | 12 ++++++++---- subliminal/providers/opensubtitles.py | 9 +++++++-- subliminal/providers/podnapisi.py | 9 +++++++-- subliminal/providers/tvsubtitles.py | 7 +++++-- 6 files changed, 31 insertions(+), 11 deletions(-) diff --git a/subliminal/cache.py b/subliminal/cache.py index fdacbfb79..4735aa26a 100644 --- a/subliminal/cache.py +++ b/subliminal/cache.py @@ -7,7 +7,7 @@ #: Subliminal's cache version -CACHE_VERSION = 1 +CACHE_VERSION = 2 def subliminal_key_generator(namespace, fn, to_str=string_type): diff --git a/subliminal/providers/__init__.py b/subliminal/providers/__init__.py index 87d67d933..d0429e7ce 100644 --- a/subliminal/providers/__init__.py +++ b/subliminal/providers/__init__.py @@ -3,6 +3,9 @@ import babelfish from ..video import Episode, Movie +import re +#: The following characters are always stripped +IGNORED_CHARACTERS_RE = re.compile('[!@#$\'"]') class Provider(object): """Base class for providers diff --git a/subliminal/providers/addic7ed.py b/subliminal/providers/addic7ed.py index 10ced1463..05ed326b6 100644 --- a/subliminal/providers/addic7ed.py +++ b/subliminal/providers/addic7ed.py @@ -6,6 +6,7 @@ import charade import requests from . import Provider +from . import IGNORED_CHARACTERS_RE from .. import __version__ from ..cache import region from ..exceptions import ProviderConfigurationError, ProviderNotAvailable, InvalidSubtitle @@ -126,7 +127,9 @@ def get_show_ids(self): soup = self.get('/shows.php') show_ids = {} for html_show in soup.select('td.version > h3 > a[href^="/show/"]'): - show_ids[html_show.string.lower()] = int(html_show['href'][6:]) + show_ids[ + IGNORED_CHARACTERS_RE.sub('', html_show.string).lower()] = \ + int(html_show['href'][6:]) return show_ids @region.cache_on_arguments() @@ -150,10 +153,11 @@ def find_show_id(self, series): def query(self, series, season): show_ids = self.get_show_ids() - if series.lower() in show_ids: - show_id = show_ids[series.lower()] + _series = IGNORED_CHARACTERS_RE.sub('', series).lower() + if _series in show_ids: + show_id = show_ids[_series] else: - show_id = self.find_show_id(series.lower()) + show_id = self.find_show_id(_series) if show_id is None: return [] params = {'show_id': show_id, 'season': season} diff --git a/subliminal/providers/opensubtitles.py b/subliminal/providers/opensubtitles.py index 969e1ba4c..9f75fe49e 100644 --- a/subliminal/providers/opensubtitles.py +++ b/subliminal/providers/opensubtitles.py @@ -10,6 +10,7 @@ import charade import guessit from . import Provider +from . import IGNORED_CHARACTERS_RE from .. import __version__ from ..exceptions import ProviderError, ProviderNotAvailable, InvalidSubtitle from ..subtitle import Subtitle, is_valid_subtitle, compute_guess_matches @@ -50,7 +51,9 @@ def compute_matches(self, video): # episode if isinstance(video, Episode) and self.movie_kind == 'episode': # series - if video.series and self.series_name.lower() == video.series.lower(): + if video.series and \ + IGNORED_CHARACTERS_RE.sub('', self.series_name).lower() == \ + IGNORED_CHARACTERS_RE.sub('', video.series).lower(): matches.add('series') # season if video.season and self.series_season == video.season: @@ -77,7 +80,9 @@ def compute_matches(self, video): if video.imdb_id and self.movie_imdb_id == video.imdb_id: matches.add('imdb_id') # title - if video.title and self.movie_name.lower() == video.title.lower(): + if video.title and \ + IGNORED_CHARACTERS_RE.sub('', self.movie_name).lower() == \ + IGNORED_CHARACTERS_RE.sub('', video.title).lower(): matches.add('title') return matches diff --git a/subliminal/providers/podnapisi.py b/subliminal/providers/podnapisi.py index b550953ef..65d448c7c 100644 --- a/subliminal/providers/podnapisi.py +++ b/subliminal/providers/podnapisi.py @@ -12,6 +12,7 @@ import guessit import requests from . import Provider +from . import IGNORED_CHARACTERS_RE from .. import __version__ from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError from ..subtitle import Subtitle, is_valid_subtitle, compute_guess_matches @@ -45,7 +46,9 @@ def compute_matches(self, video): # episode if isinstance(video, Episode): # series - if video.series and self.series.lower() == video.series.lower(): + if video.series and \ + IGNORED_CHARACTERS_RE.sub('', self.series).lower() == \ + IGNORED_CHARACTERS_RE.sub('', video.series).lower(): matches.add('series') # season if video.season and self.season == video.season: @@ -59,7 +62,9 @@ def compute_matches(self, video): # movie elif isinstance(video, Movie): # title - if video.title and self.title.lower() == video.title.lower(): + if video.title and \ + IGNORED_CHARACTERS_RE.sub('', self.title).lower() == \ + IGNORED_CHARACTERS_RE.sub('', video.title).lower(): matches.add('title') # year if video.year and self.year == video.year: diff --git a/subliminal/providers/tvsubtitles.py b/subliminal/providers/tvsubtitles.py index e070bb290..4f60c84ad 100644 --- a/subliminal/providers/tvsubtitles.py +++ b/subliminal/providers/tvsubtitles.py @@ -10,6 +10,7 @@ import charade import requests from . import Provider +from . import IGNORED_CHARACTERS_RE from .. import __version__ from ..cache import region from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError @@ -114,7 +115,9 @@ def find_show_id(self, series): if not match: logger.warning('Could not parse %r', link.string) continue - if match.group('series').lower().replace('.', ' ').strip() == series.lower(): + if IGNORED_CHARACTERS_RE.sub('', match.group('series'))\ + .strip().replace('.', ' ').lower() == \ + IGNORED_CHARACTERS_RE.sub('', series).lower(): return int(link['href'][8:-5]) return int(links[0]['href'][8:-5]) @@ -140,7 +143,7 @@ def find_episode_ids(self, show_id, season): return episode_ids def query(self, series, season, episode): - show_id = self.find_show_id(series.lower()) + show_id = self.find_show_id(IGNORED_CHARACTERS_RE.sub('', series.lower())) if show_id is None: return [] episode_ids = self.find_episode_ids(show_id, season) From 827c75f092af158eb0d42612a206ab7a8997d09a Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Tue, 11 Nov 2014 13:16:12 -0500 Subject: [PATCH 10/26] better handling of duplicate download prevention --- subliminal/api.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/subliminal/api.py b/subliminal/api.py index dd30a10ea..9be0ad628 100644 --- a/subliminal/api.py +++ b/subliminal/api.py @@ -6,6 +6,7 @@ import operator import babelfish import pkg_resources +from os.path import basename from .exceptions import ProviderNotAvailable, InvalidSubtitle from .subtitle import get_subtitle_path from socket import error as socket_error @@ -95,9 +96,11 @@ def download_subtitles(subtitles, provider_configs=None, single=False): providers_by_name = dict([(ep.name, ep.load()) for ep in pkg_resources.iter_entry_points(PROVIDERS_ENTRY_POINT)]) initialized_providers = {} + downloaded_subtitles = collections.defaultdict(list) + fetched_subtitles = set() try: for video, video_subtitles in subtitles.items(): - languages = dict([subtitle.language for subtitle in video_subtitles]) + languages = set([subtitle.language for subtitle in video_subtitles]) downloaded_languages = set() for subtitle in video_subtitles: # filter @@ -132,9 +135,14 @@ def download_subtitles(subtitles, provider_configs=None, single=False): # download subtitles subtitle_path = get_subtitle_path(video.name, None if single else subtitle.language) + if basename(subtitle_path) in fetched_subtitles: + logger.debug('Skipping subtitle already retrieved %r', basename(subtitle_path)) + continue + logger.info('Downloading subtitle %r into %r', subtitle, subtitle_path) try: subtitle_text = provider.download_subtitle(subtitle) + downloaded_subtitles[video].append(subtitle) except ProviderNotAvailable: logger.warning('Provider %r is not available, discarding it', subtitle.provider_name) discarded_providers.add(subtitle.provider_name) @@ -147,12 +155,10 @@ def download_subtitles(subtitles, provider_configs=None, single=False): continue with io.open(subtitle_path, 'w', encoding='utf-8') as f: f.write(subtitle_text) - downloaded_languages.add(subtitle.language) + downloaded_languages.add(subtitle.language) + fetched_subtitles.add(basename(subtitle_path)) if single or sorted(downloaded_languages) == sorted(languages): break - # handle outerloop - if single or sorted(downloaded_languages) == sorted(languages): - break finally: # terminate providers for (provider_name, provider) in initialized_providers.items(): try: @@ -161,6 +167,7 @@ def download_subtitles(subtitles, provider_configs=None, single=False): logger.warning('Provider %r is not available, unable to terminate', provider_name) except: logger.exception('Unexpected error in provider %r', provider_name) + return downloaded_subtitles def download_best_subtitles(videos, languages, providers=None, provider_configs=None, single=False, min_score=0, @@ -184,6 +191,7 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= provider_configs = provider_configs or {} discarded_providers = set() downloaded_subtitles = collections.defaultdict(list) + fetched_subtitles = set() # filter videos videos = [v for v in videos if v.subtitle_languages & languages < languages and (not single or babelfish.Language('und') not in v.subtitle_languages)] @@ -228,6 +236,7 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= if provider_name in discarded_providers: logger.debug('Skipping discarded provider %r', provider_name) continue + provider_video_languages = provider.languages & languages - video.subtitle_languages if not provider_video_languages: logger.debug('Skipping provider %r: no language to search for for video %r', provider_name, @@ -248,7 +257,6 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= subtitles.extend(provider_subtitles) # find the best subtitles and download them - languages = video.subtitle_languages.copy() for subtitle, score in sorted([(s, s.compute_score(video)) for s in subtitles], key=operator.itemgetter(1), reverse=True): # filter @@ -273,6 +281,10 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= # download provider = initialized_providers[subtitle.provider_name] subtitle_path = get_subtitle_path(video.name, None if single else subtitle.language) + if basename(subtitle_path) in fetched_subtitles: + logger.debug('Skipping subtitle already retrieved %r', basename(subtitle_path)) + continue + logger.info('Downloading subtitle %r with score %d into %r', subtitle, score, subtitle_path) try: subtitle_text = provider.download_subtitle(subtitle) @@ -289,12 +301,10 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= continue with io.open(subtitle_path, 'w', encoding='utf-8') as f: f.write(subtitle_text) - downloaded_languages.add(subtitle.language) + downloaded_languages.add(subtitle.language) + fetched_subtitles.add(basename(subtitle_path)) if single or sorted(downloaded_languages) == sorted(languages): break - # handle outer loop (prevent second iteration if it is unnessisary) - if single or sorted(downloaded_languages) == sorted(languages): - break finally: # terminate providers for (provider_name, provider) in initialized_providers.items(): From 6cdf18961e080b03bd12b3402551cd7cd1a601e5 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Tue, 11 Nov 2014 13:16:57 -0500 Subject: [PATCH 11/26] Fixed TVSubtitles.net matching --- subliminal/providers/tvsubtitles.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/subliminal/providers/tvsubtitles.py b/subliminal/providers/tvsubtitles.py index 4f60c84ad..a653663b9 100644 --- a/subliminal/providers/tvsubtitles.py +++ b/subliminal/providers/tvsubtitles.py @@ -17,6 +17,7 @@ from ..subtitle import Subtitle, is_valid_subtitle from ..video import Episode +IGNORE_DATEMATCH=re.compile('^(.*)[ \t0-9-._)(]*$') logger = logging.getLogger(__name__) @@ -107,17 +108,35 @@ def find_show_id(self, series): logger.debug('Searching series %r', data) soup = self.request('/search.php', data=data, method='POST') links = soup.select('div.left li div a[href^="/tvshow-"]') + _series = IGNORE_DATEMATCH.match( + IGNORED_CHARACTERS_RE.sub('', series)\ + .replace('.', ' ').strip().lower(), + ) + if not _series: + _series = IGNORED_CHARACTERS_RE.sub('', series)\ + .replace('.', ' ').strip().lower() + else: + _series = _series.group(1) + if not links: logger.info('Series %r not found', series) return None + for link in links: match = self.link_re.match(link.string) if not match: logger.warning('Could not parse %r', link.string) continue - if IGNORED_CHARACTERS_RE.sub('', match.group('series'))\ - .strip().replace('.', ' ').lower() == \ - IGNORED_CHARACTERS_RE.sub('', series).lower(): + show = IGNORE_DATEMATCH.match( + IGNORED_CHARACTERS_RE.sub('', match.group('series'))\ + .replace('.', ' ').strip().lower(), + ) + if not show: + logger.warning('Could not postparse %r', match.group('series')) + continue + show = show.group(1) + + if show == _series: return int(link['href'][8:-5]) return int(links[0]['href'][8:-5]) @@ -143,7 +162,7 @@ def find_episode_ids(self, show_id, season): return episode_ids def query(self, series, season, episode): - show_id = self.find_show_id(IGNORED_CHARACTERS_RE.sub('', series.lower())) + show_id = self.find_show_id(series) if show_id is None: return [] episode_ids = self.find_episode_ids(show_id, season) From 87aefa3a4d9dcf3a16469688b2d5c3a19055d3ad Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Tue, 11 Nov 2014 13:17:51 -0500 Subject: [PATCH 12/26] Massive overhaul on testing to make it Python v2.6 compatible --- subliminal/tests/test_providers.py | 282 +++++++++++----------------- subliminal/tests/test_subliminal.py | 47 ++--- 2 files changed, 130 insertions(+), 199 deletions(-) diff --git a/subliminal/tests/test_providers.py b/subliminal/tests/test_providers.py index c47f2634c..27515f564 100644 --- a/subliminal/tests/test_providers.py +++ b/subliminal/tests/test_providers.py @@ -30,131 +30,61 @@ def test_find_show_id(self): def test_find_show_id_error(self): with self.Provider() as provider: show_id = provider.find_show_id('the big how i met your mother') - self.assertIsNone(show_id) + self.assertEqual(show_id, None) def test_get_show_ids(self): with self.Provider() as provider: show_ids = provider.get_show_ids() - self.assertIn('the big bang theory', show_ids) + self.assertTrue('the big bang theory' in show_ids) self.assertEqual(show_ids['the big bang theory'], 126) def test_query_episode_0(self): video = EPISODES[0] - languages = (Language('tur'), Language('rus'), Language('heb'), Language('ita'), Language('fra'), + languages = set([Language('rus'), Language('heb'), Language('ita'), Language('fra'), Language('ron'), Language('nld'), Language('eng'), Language('deu'), Language('ell'), - Language('por', 'BR'), Language('bul')) - matches = (frozenset(['episode', 'release_group', 'title', 'series', 'resolution', 'season']), + Language('por', 'BR'), Language('bul')]) + matches = set([frozenset(['episode', 'release_group', 'title', 'series', 'resolution', 'season']), frozenset(['series', 'resolution', 'season']), frozenset(['series', 'episode', 'season', 'title']), frozenset(['series', 'release_group', 'season']), frozenset(['series', 'episode', 'season', 'release_group', 'title']), - frozenset(['series', 'season'])) + frozenset(['series', 'season'])]) with self.Provider() as provider: subtitles = provider.query(video.series, video.season) - self.assertEqual(set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]), matches) - self.assertEqual(set([subtitle.language for subtitle in subtitles]), languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_episode_1(self): video = EPISODES[1] - languages = (Language('ind'), Language('spa'), Language('hrv'), Language('ita'), Language('fra'), + languages = set([Language('ind'), Language('spa'), Language('hrv'), Language('ita'), Language('fra'), Language('cat'), Language('ell'), Language('nld'), Language('eng'), Language('fas'), Language('por'), Language('nor'), Language('deu'), Language('ron'), Language('por', 'BR'), - Language('bul')) - matches = (frozenset(['series', 'episode', 'resolution', 'season', 'title']), + Language('bul')]) + matches = set([frozenset(['series', 'episode', 'resolution', 'season', 'title']), frozenset(['series', 'resolution', 'season']), frozenset(['series', 'episode', 'season', 'title']), frozenset(['series', 'release_group', 'season']), frozenset(['series', 'resolution', 'release_group', 'season']), frozenset(['series', 'episode', 'season', 'release_group', 'title']), - frozenset(['series', 'season'])) + frozenset(['series', 'season'])]) with self.Provider() as provider: subtitles = provider.query(video.series, video.season) - self.assertEqual(set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]), matches) - self.assertEqual(set([subtitle.language for subtitle in subtitles]), languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_list_subtitles(self): video = EPISODES[0] - languages = (Language('eng'), Language('fra')) - matches = (frozenset(['series', 'episode', 'season', 'release_group', 'title']), - frozenset(['series', 'episode', 'season', 'title'])) + languages = set([Language('eng'), Language('fra')]) + matches = set([frozenset(['series', 'episode', 'season', 'release_group', 'title']), + frozenset(['series', 'episode', 'season', 'title'])]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) - self.assertEqual(set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]), matches) - self.assertEqual(set([subtitle.language for subtitle in subtitles]), languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_download_subtitle(self): video = EPISODES[0] - languages = (Language('eng'), Language('fra')) - with self.Provider() as provider: - subtitles = provider.list_subtitles(video, languages) - subtitle_text = provider.download_subtitle(subtitles[0]) - self.assertTrue(is_valid_subtitle(subtitle_text)) - - -class BierDopjeProviderTestCase(ProviderTestCase): - provider_name = 'bierdopje' - - def test_find_show_id(self): - with self.Provider() as provider: - show_id = provider.find_show_id('The Big Bang') - self.assertEqual(show_id, 9203) - - def test_find_show_id_error(self): - with self.Provider() as provider: - show_id = provider.find_show_id('the big how i met your mother') - self.assertIsNone(show_id) - - def test_query_episode_0(self): - video = EPISODES[0] - language = Language('eng') - matches = (frozenset(['series', 'video_codec', 'resolution', 'episode', 'season']), - frozenset(['season', 'video_codec', 'episode', 'series']), - frozenset(['episode', 'video_codec', 'season', 'series', 'resolution', 'release_group'])) - with self.Provider() as provider: - subtitles = provider.query(language, video.season, video.episode, series=video.series) - self.assertEqual(set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]), matches) - self.assertEqual(set([subtitle.language for subtitle in subtitles]), (language,)) - - def test_query_episode_1(self): - video = EPISODES[1] - language = Language('nld') - matches = (frozenset(['series', 'video_codec', 'resolution', 'episode', 'season']), - frozenset(['season', 'video_codec', 'episode', 'series']), - frozenset(['series', 'episode', 'season']), - frozenset(['season', 'video_codec', 'episode', 'release_group', 'series']), - frozenset(['episode', 'video_codec', 'season', 'series', 'resolution', 'release_group'])) - with self.Provider() as provider: - subtitles = provider.query(language, video.season, video.episode, series=video.series) - self.assertEqual(set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]), matches) - self.assertEqual(set([subtitle.language for subtitle in subtitles]), (language,)) - - def test_query_episode_0_tvdb_id(self): - video = EPISODES[0] - language = Language('eng') - matches = (frozenset(['video_codec', 'tvdb_id', 'episode', 'season', 'series']), - frozenset(['episode', 'video_codec', 'series', 'season', 'tvdb_id', 'resolution', 'release_group']), - frozenset(['episode', 'series', 'video_codec', 'tvdb_id', 'resolution', 'season'])) - with self.Provider() as provider: - subtitles = provider.query(language, video.season, video.episode, tvdb_id=video.tvdb_id) - self.assertEqual(set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]), matches) - self.assertEqual(set([subtitle.language for subtitle in subtitles]), (language,)) - - def test_list_subtitles(self): - video = EPISODES[1] - languages = (Language('eng'), Language('nld')) - matches = (frozenset(['series', 'video_codec', 'tvdb_id', 'episode', 'season']), - frozenset(['episode', 'video_codec', 'season', 'series', 'tvdb_id', 'resolution', 'release_group']), - frozenset(['season', 'tvdb_id', 'episode', 'series']), - frozenset(['episode', 'video_codec', 'season', 'series', 'tvdb_id', 'resolution']), - frozenset(['episode', 'video_codec', 'season', 'series', 'tvdb_id', 'release_group'])) - with self.Provider() as provider: - subtitles = provider.list_subtitles(video, languages) - self.assertEqual(set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]), matches) - self.assertEqual(set([subtitle.language for subtitle in subtitles]), languages) - - def test_download_subtitle(self): - video = EPISODES[0] - languages = (Language('eng'), Language('nld')) + languages = set([Language('eng'), Language('fra')]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) subtitle_text = provider.download_subtitle(subtitles[0]) @@ -166,97 +96,97 @@ class OpenSubtitlesProviderTestCase(ProviderTestCase): def test_query_movie_0_query(self): video = MOVIES[0] - languages = {Language('eng')} - matches = {frozenset([]), frozenset(['imdb_id', 'resolution', 'title', 'year']), + languages = set([Language('eng'), ]) + matches = set([frozenset([]), frozenset(['imdb_id', 'resolution', 'title', 'year']), frozenset(['imdb_id', 'title', 'year']), frozenset(['imdb_id', 'video_codec', 'title', 'year']), frozenset(['imdb_id', 'resolution', 'title', 'video_codec', 'year']), - frozenset(['imdb_id', 'title', 'year', 'video_codec', 'resolution', 'release_group'])} + frozenset(['imdb_id', 'title', 'year', 'video_codec', 'resolution', 'release_group'])]) with self.Provider() as provider: subtitles = provider.query(languages, query=video.title) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_episode_0_query(self): video = EPISODES[0] - languages = {Language('eng')} - matches = {frozenset(['series', 'episode', 'season', 'imdb_id']), + languages = set([Language('eng'), ]) + matches = set([frozenset(['series', 'episode', 'season', 'imdb_id']), frozenset(['series', 'imdb_id', 'video_codec', 'episode', 'season']), - frozenset(['episode', 'title', 'series', 'imdb_id', 'video_codec', 'season'])} + frozenset(['episode', 'title', 'series', 'imdb_id', 'video_codec', 'season'])]) with self.Provider() as provider: subtitles = provider.query(languages, query=video.name.split(os.sep)[-1]) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_episode_1_query(self): video = EPISODES[1] - languages = {Language('eng'), Language('fra')} - matches = {frozenset(['episode', 'title', 'series', 'imdb_id', 'video_codec', 'season']), + languages = set([Language('eng'), Language('fra')]) + matches = set([frozenset(['episode', 'title', 'series', 'imdb_id', 'video_codec', 'season']), frozenset(['series', 'imdb_id', 'title', 'episode', 'season']), frozenset(['series', 'imdb_id', 'video_codec', 'episode', 'season']), frozenset(['episode', 'video_codec', 'series', 'imdb_id', 'resolution', 'season']), frozenset(['series', 'imdb_id', 'resolution', 'episode', 'season']), - frozenset(['series', 'episode', 'season', 'imdb_id'])} + frozenset(['series', 'episode', 'season', 'imdb_id'])]) with self.Provider() as provider: subtitles = provider.query(languages, query=video.name.split(os.sep)[-1]) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_movie_0_imdb_id(self): video = MOVIES[0] - languages = {Language('eng'), Language('fra')} - matches = {frozenset(['imdb_id', 'video_codec', 'title', 'year']), + languages = set([Language('eng'), Language('fra')]) + matches = set([frozenset(['imdb_id', 'video_codec', 'title', 'year']), frozenset(['imdb_id', 'resolution', 'title', 'video_codec', 'year']), frozenset(['imdb_id', 'title', 'year', 'video_codec', 'resolution', 'release_group']), frozenset(['imdb_id', 'title', 'year']), - frozenset(['imdb_id', 'resolution', 'title', 'year'])} + frozenset(['imdb_id', 'resolution', 'title', 'year'])]) with self.Provider() as provider: subtitles = provider.query(languages, imdb_id=video.imdb_id) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_episode_0_imdb_id(self): video = EPISODES[0] - languages = {Language('eng'), Language('fra')} - matches = {frozenset(['series', 'episode', 'season', 'imdb_id']), + languages = set([Language('eng'), Language('fra')]) + matches = set([frozenset(['series', 'episode', 'season', 'imdb_id']), frozenset(['episode', 'release_group', 'video_codec', 'series', 'imdb_id', 'resolution', 'season']), frozenset(['series', 'imdb_id', 'video_codec', 'episode', 'season']), - frozenset(['episode', 'title', 'series', 'imdb_id', 'video_codec', 'season'])} + frozenset(['episode', 'title', 'series', 'imdb_id', 'video_codec', 'season'])]) with self.Provider() as provider: subtitles = provider.query(languages, imdb_id=video.imdb_id) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_movie_0_hash(self): video = MOVIES[0] - languages = {Language('eng')} - matches = {frozenset(['hash', 'title', 'video_codec', 'year', 'resolution', 'imdb_id']), + languages = set([Language('eng'), ]) + matches = set([frozenset(['hash', 'title', 'video_codec', 'year', 'resolution', 'imdb_id']), frozenset(['hash', 'title', 'video_codec', 'year', 'resolution', 'release_group', 'imdb_id']), frozenset(['year', 'video_codec', 'imdb_id', 'hash', 'title']), frozenset([]), frozenset(['year', 'resolution', 'imdb_id', 'hash', 'title']), - frozenset(['year', 'imdb_id', 'hash', 'title'])} + frozenset(['year', 'imdb_id', 'hash', 'title'])]) with self.Provider() as provider: subtitles = provider.query(languages, hash=video.hashes['opensubtitles'], size=video.size) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_episode_0_hash(self): video = EPISODES[0] - languages = {Language('eng')} - matches = {frozenset(['series', 'hash']), + languages = set([Language('eng'), ]) + matches = set([frozenset(['series', 'hash']), frozenset(['episode', 'season', 'series', 'imdb_id', 'video_codec', 'hash']), frozenset(['series', 'episode', 'season', 'hash', 'imdb_id']), - frozenset(['series', 'resolution', 'hash', 'video_codec'])} + frozenset(['series', 'resolution', 'hash', 'video_codec'])]) with self.Provider() as provider: subtitles = provider.query(languages, hash=video.hashes['opensubtitles'], size=video.size) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_list_subtitles(self): video = MOVIES[0] - languages = {Language('eng'), Language('fra')} - matches = {frozenset(['title', 'video_codec', 'year', 'resolution', 'release_group', 'imdb_id']), + languages = set([Language('eng'), Language('fra')]) + matches = set([frozenset(['title', 'video_codec', 'year', 'resolution', 'release_group', 'imdb_id']), frozenset(['imdb_id', 'year', 'title']), frozenset(['year', 'video_codec', 'imdb_id', 'resolution', 'title']), frozenset(['hash', 'title', 'video_codec', 'year', 'resolution', 'release_group', 'imdb_id']), @@ -266,15 +196,15 @@ def test_list_subtitles(self): frozenset(['hash', 'title', 'video_codec', 'year', 'resolution', 'imdb_id']), frozenset(['year', 'imdb_id', 'hash', 'title']), frozenset(['video_codec', 'imdb_id', 'year', 'title']), - frozenset(['year', 'imdb_id', 'resolution', 'title'])} + frozenset(['year', 'imdb_id', 'resolution', 'title'])]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_download_subtitle(self): video = MOVIES[0] - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) subtitle_text = provider.download_subtitle(subtitles[0]) @@ -287,44 +217,44 @@ class PodnapisiProviderTestCase(ProviderTestCase): def test_query_movie_0(self): video = MOVIES[0] language = Language('eng') - matches = {frozenset(['video_codec', 'title', 'resolution', 'year']), + matches = set([frozenset(['video_codec', 'title', 'resolution', 'year']), frozenset(['title', 'resolution', 'year']), frozenset(['video_codec', 'title', 'year']), frozenset(['title', 'year']), frozenset(['video_codec', 'title', 'resolution', 'release_group', 'year']), - frozenset(['video_codec', 'title', 'resolution', 'audio_codec', 'year'])} + frozenset(['video_codec', 'title', 'resolution', 'audio_codec', 'year'])]) with self.Provider() as provider: subtitles = provider.query(language, title=video.title, year=video.year) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, {language}) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue([Language, ], [subtitle.language for subtitle in subtitles]) def test_query_episode_0(self): video = EPISODES[0] language = Language('eng') - matches = {frozenset(['episode', 'series', 'season', 'video_codec', 'resolution', 'release_group']), - frozenset(['season', 'video_codec', 'episode', 'resolution', 'series'])} + matches = set([frozenset(['episode', 'series', 'season', 'video_codec', 'resolution', 'release_group']), + frozenset(['season', 'video_codec', 'episode', 'resolution', 'series'])]) with self.Provider() as provider: subtitles = provider.query(language, series=video.series, season=video.season, episode=video.episode) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, {language}) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue([Language, ], [subtitle.language for subtitle in subtitles]) def test_list_subtitles(self): video = MOVIES[0] - languages = {Language('eng'), Language('fra')} - matches = {frozenset(['video_codec', 'title', 'resolution', 'year']), + languages = set([Language('eng'), Language('fra')]) + matches = set([frozenset(['video_codec', 'title', 'resolution', 'year']), frozenset(['title', 'resolution', 'year']), frozenset(['video_codec', 'title', 'year']), frozenset(['title', 'year']), frozenset(['video_codec', 'title', 'resolution', 'release_group', 'year']), - frozenset(['video_codec', 'title', 'resolution', 'audio_codec', 'year'])} + frozenset(['video_codec', 'title', 'resolution', 'audio_codec', 'year'])]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_download_subtitle(self): video = MOVIES[0] - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) subtitle_text = provider.download_subtitle(subtitles[0]) @@ -336,34 +266,34 @@ class TheSubDBProviderTestCase(ProviderTestCase): def test_query_episode_0(self): video = EPISODES[0] - languages = {Language('eng'), Language('spa'), Language('por')} - matches = {frozenset(['hash'])} + languages = set([Language('eng'), Language('spa'), Language('por')]) + matches = set([frozenset(['hash']), ]) with self.Provider() as provider: subtitles = provider.query(video.hashes['thesubdb']) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_episode_1(self): video = EPISODES[1] - languages = {Language('eng'), Language('por')} - matches = {frozenset(['hash'])} + languages = set([Language('eng'), Language('por')]) + matches = set([frozenset(['hash']), ]) with self.Provider() as provider: subtitles = provider.query(video.hashes['thesubdb']) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_list_subtitles(self): video = MOVIES[0] - languages = {Language('eng'), Language('por')} - matches = {frozenset(['hash'])} + languages = set([Language('eng'), Language('por')]) + matches = set([frozenset(['hash']), ]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_download_subtitle(self): video = MOVIES[0] - languages = {Language('eng'), Language('por')} + languages = (Language('eng'), Language('por'), ) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) subtitle_text = provider.download_subtitle(subtitles[0]) @@ -391,7 +321,7 @@ def test_find_show_id_no_dots(self): def test_find_show_id_error(self): with self.Provider() as provider: show_id = provider.find_show_id('the big gaming') - self.assertIsNone(show_id) + self.assertEqual(show_id, None) def test_find_episode_ids(self): with self.Provider() as provider: @@ -400,38 +330,39 @@ def test_find_episode_ids(self): def test_query_episode_0(self): video = EPISODES[0] - languages = {Language('fra'), Language('por'), Language('hun'), Language('ron'), Language('eng')} - matches = {frozenset(['series', 'episode', 'season', 'video_codec']), - frozenset(['series', 'episode', 'season'])} + languages = set([Language('fra'), Language('por'), Language('hun'), Language('ron'), Language('eng')]) + matches = set([frozenset(['series', 'episode', 'resolution', 'season']), + frozenset(['series', 'episode', 'season'])]) with self.Provider() as provider: subtitles = provider.query(video.series, video.season, video.episode) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_episode_1(self): video = EPISODES[1] - languages = {Language('fra'), Language('ell'), Language('ron'), Language('eng'), Language('hun'), - Language('por'), Language('por', 'BR')} - matches = {frozenset(['series', 'episode', 'resolution', 'season']), + languages = set([Language('fra'), Language('ell'), Language('ron'), Language('eng'), Language('hun'), + Language('por'), Language('por', 'BR')]) + matches = set([frozenset(['series', 'episode', 'resolution', 'season']), frozenset(['series', 'episode', 'season', 'video_codec']), - frozenset(['series', 'episode', 'season'])} + frozenset(['series', 'episode', 'season'])]) with self.Provider() as provider: subtitles = provider.query(video.series, video.season, video.episode) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_list_subtitles(self): video = EPISODES[0] - languages = {Language('eng'), Language('fra')} - matches = {frozenset(['series', 'episode', 'season'])} + languages = set([Language('eng'), Language('fra')]) + matches = set([frozenset(['series', 'episode', 'resolution', 'season']), + frozenset([u'series', u'episode', u'season'])]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_download_subtitle(self): video = EPISODES[0] - languages = {Language('hun')} + languages = (Language('hun'), ) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) subtitle_text = provider.download_subtitle(subtitles[0]) @@ -441,7 +372,6 @@ def test_download_subtitle(self): def suite(): suite = TestSuite() suite.addTest(TestLoader().loadTestsFromTestCase(Addic7edProviderTestCase)) - suite.addTest(TestLoader().loadTestsFromTestCase(BierDopjeProviderTestCase)) suite.addTest(TestLoader().loadTestsFromTestCase(OpenSubtitlesProviderTestCase)) suite.addTest(TestLoader().loadTestsFromTestCase(PodnapisiProviderTestCase)) suite.addTest(TestLoader().loadTestsFromTestCase(TheSubDBProviderTestCase)) diff --git a/subliminal/tests/test_subliminal.py b/subliminal/tests/test_subliminal.py index 98e19a13a..f8316a221 100644 --- a/subliminal/tests/test_subliminal.py +++ b/subliminal/tests/test_subliminal.py @@ -21,30 +21,30 @@ def tearDown(self): def test_list_subtitles_movie_0(self): videos = [MOVIES[0]] - languages = {Language('eng')} + languages = set([ Language('eng'), ]) subtitles = list_subtitles(videos, languages) self.assertEqual(len(subtitles), len(videos)) - self.assertGreater(len(subtitles[videos[0]]), 0) + self.assertTrue(len(subtitles[videos[0]]) > 0) def test_list_subtitles_movie_0_por_br(self): videos = [MOVIES[0]] - languages = {Language('por', 'BR')} + languages = set([Language('por', 'BR'), ]) subtitles = list_subtitles(videos, languages) self.assertEqual(len(subtitles), len(videos)) - self.assertGreater(len(subtitles[videos[0]]), 0) + self.assertTrue(len(subtitles[videos[0]]) > 0) def test_list_subtitles_episodes(self): videos = [EPISODES[0], EPISODES[1]] - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) subtitles = list_subtitles(videos, languages) self.assertEqual(len(subtitles), len(videos)) - self.assertGreater(len(subtitles[videos[0]]), 0) + self.assertTrue(len(subtitles[videos[0]]) > 0) def test_download_subtitles(self): videos = [EPISODES[0], EPISODES[1]] for video in videos: video.name = os.path.join(TEST_DIR, video.name.split(os.sep)[-1]) - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) subtitles = list_subtitles(videos, languages) download_subtitles(subtitles) for video in videos: @@ -55,7 +55,7 @@ def test_download_subtitles_single(self): videos = [EPISODES[0], EPISODES[1]] for video in videos: video.name = os.path.join(TEST_DIR, video.name.split(os.sep)[-1]) - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) subtitles = list_subtitles(videos, languages) download_subtitles(subtitles, single=True) for video in videos: @@ -65,10 +65,11 @@ def test_download_best_subtitles(self): videos = [EPISODES[0], EPISODES[1]] for video in videos: video.name = os.path.join(TEST_DIR, video.name.split(os.sep)[-1]) - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) subtitles = download_best_subtitles(videos, languages) for video in videos: - self.assertEqual(video in subtitles and len(subtitles[video]), 2) + self.assertTrue(video in subtitles) + self.assertTrue(len(subtitles[video]) == 2) self.assertTrue(os.path.exists(os.path.splitext(video.name)[0] + '.en.srt')) self.assertTrue(os.path.exists(os.path.splitext(video.name)[0] + '.fr.srt')) @@ -76,10 +77,10 @@ def test_download_best_subtitles_single(self): videos = [EPISODES[0], EPISODES[1]] for video in videos: video.name = os.path.join(TEST_DIR, video.name.split(os.sep)[-1]) - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) subtitles = download_best_subtitles(videos, languages, single=True) for video in videos: - self.assertIn(video, subtitles) + self.assertTrue(video in subtitles) self.assertEqual(len(subtitles[video]), 1) self.assertTrue(os.path.exists(os.path.splitext(video.name)[0] + '.srt')) @@ -87,7 +88,7 @@ def test_download_best_subtitles_min_score(self): videos = [MOVIES[0]] for video in videos: video.name = os.path.join(TEST_DIR, video.name.split(os.sep)[-1]) - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) subtitles = download_best_subtitles(videos, languages, min_score=1000) self.assertEqual(len(subtitles), 0) @@ -95,7 +96,7 @@ def test_download_best_subtitles_hearing_impaired(self): videos = [MOVIES[0]] for video in videos: video.name = os.path.join(TEST_DIR, video.name.split(os.sep)[-1]) - languages = {Language('eng')} + languages = set([Language('eng'), ]) subtitles = download_best_subtitles(videos, languages, hearing_impaired=True) self.assertTrue(subtitles[videos[0]][0].hearing_impaired) @@ -120,8 +121,8 @@ def test_scan_video_movie(self): self.assertEqual(scanned_video.release_group, video.release_group) self.assertEqual(scanned_video.subtitle_languages, set()) self.assertEqual(scanned_video.hashes, {}) - self.assertIsNone(scanned_video.audio_codec) - self.assertIsNone(scanned_video.imdb_id) + self.assertEqual(scanned_video.audio_codec, None) + self.assertEqual(scanned_video.imdb_id, None) self.assertEqual(scanned_video.size, 0) def test_scan_video_episode(self): @@ -136,23 +137,23 @@ def test_scan_video_episode(self): self.assertEqual(scanned_video.release_group, video.release_group) self.assertEqual(scanned_video.subtitle_languages, set()) self.assertEqual(scanned_video.hashes, {}) - self.assertIsNone(scanned_video.title) - self.assertIsNone(scanned_video.tvdb_id) - self.assertIsNone(scanned_video.imdb_id) - self.assertIsNone(scanned_video.audio_codec) + self.assertEqual(scanned_video.title, None) + self.assertEqual(scanned_video.tvdb_id, None) + self.assertEqual(scanned_video.imdb_id, None) + self.assertEqual(scanned_video.audio_codec, None) self.assertEqual(scanned_video.size, 0) def test_scan_video_subtitle_language_und(self): video = EPISODES[0] open(os.path.join(TEST_DIR, os.path.splitext(os.path.split(video.name)[1])[0]) + '.srt', 'w').close() scanned_video = scan_video(os.path.join(TEST_DIR, os.path.split(video.name)[1])) - self.assertEqual(scanned_video.subtitle_languages, {Language('und')}) + self.assertEqual(scanned_video.subtitle_languages, set([Language('und'), ])) def test_scan_video_subtitles_language_eng(self): video = EPISODES[0] open(os.path.join(TEST_DIR, os.path.splitext(os.path.split(video.name)[1])[0]) + '.en.srt', 'w').close() scanned_video = scan_video(os.path.join(TEST_DIR, os.path.split(video.name)[1])) - self.assertEqual(scanned_video.subtitle_languages, {Language('eng')}) + self.assertEqual(scanned_video.subtitle_languages, set([Language('eng'), ])) def test_scan_video_subtitles_languages(self): video = EPISODES[0] @@ -160,7 +161,7 @@ def test_scan_video_subtitles_languages(self): open(os.path.join(TEST_DIR, os.path.splitext(os.path.split(video.name)[1])[0]) + '.fr.srt', 'w').close() open(os.path.join(TEST_DIR, os.path.splitext(os.path.split(video.name)[1])[0]) + '.srt', 'w').close() scanned_video = scan_video(os.path.join(TEST_DIR, os.path.split(video.name)[1])) - self.assertEqual(scanned_video.subtitle_languages, {Language('eng'), Language('fra'), Language('und')}) + self.assertEqual(scanned_video.subtitle_languages, set([Language('eng'), Language('fra'), Language('und')])) def suite(): From 90d06e2072452f2d42b3d26b6bf2ff330993cc27 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Tue, 11 Nov 2014 14:06:09 -0500 Subject: [PATCH 13/26] logging output slighly adjusted (added some clarity and removed some unnecessary entries when not debugging) --- subliminal/api.py | 3 +++ subliminal/providers/addic7ed.py | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/subliminal/api.py b/subliminal/api.py index 9be0ad628..f1879796c 100644 --- a/subliminal/api.py +++ b/subliminal/api.py @@ -312,6 +312,9 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= provider.terminate() except ProviderNotAvailable: logger.warning('Provider %r is not available, unable to terminate', provider_name) + except socket_error as err: + logger.warning('Provider %r is not available, unable to terminate', provider_name) + logger.debug('Provider socket error: %r', str(err)) except: logger.exception('Unexpected error in provider %r', provider_name) return downloaded_subtitles diff --git a/subliminal/providers/addic7ed.py b/subliminal/providers/addic7ed.py index 05ed326b6..7ef8575c6 100644 --- a/subliminal/providers/addic7ed.py +++ b/subliminal/providers/addic7ed.py @@ -74,24 +74,24 @@ def initialize(self): self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__} # login if self.username is not None and self.password is not None: - logger.debug('Logging in') + logger.debug('Logging in to Addic7ed') data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'} try: r = self.session.post(self.server + '/dologin.php', data, timeout=10, allow_redirects=False) except requests.Timeout: raise ProviderNotAvailable('Timeout after 10 seconds') if r.status_code == 302: - logger.info('Logged in') + logger.debug('Successfully logged in to Addic7ed.') self.logged_in = True else: - logger.error('Failed to login') + logger.error('Failed to login to Addic7ed!') def terminate(self): # logout if self.logged_in: try: r = self.session.get(self.server + '/logout.php', timeout=10) - logger.info('Logged out') + logger.debug('Successfully logged out of Addic7ed.') except requests.Timeout: raise ProviderNotAvailable('Timeout after 10 seconds') if r.status_code != 200: From d635269aa03ab1fa62511379fa500dd6e0e96e0f Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Tue, 11 Nov 2014 14:07:49 -0500 Subject: [PATCH 14/26] removed bierdopje provider since it's not referenced anymore anyway (tests for it were removed in an earlier commit) --- subliminal/providers/bierdopje.py | 139 ------------------------------ 1 file changed, 139 deletions(-) delete mode 100644 subliminal/providers/bierdopje.py diff --git a/subliminal/providers/bierdopje.py b/subliminal/providers/bierdopje.py deleted file mode 100644 index dd7d06735..000000000 --- a/subliminal/providers/bierdopje.py +++ /dev/null @@ -1,139 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals -import logging -import urllib -import babelfish -import charade -import guessit -import requests -import xml.etree.ElementTree -from . import Provider -from .. import __version__ -from ..cache import region -from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError -from ..subtitle import Subtitle, is_valid_subtitle, compute_guess_matches -from ..video import Episode - - -logger = logging.getLogger(__name__) - - -class BierDopjeSubtitle(Subtitle): - provider_name = 'bierdopje' - - def __init__(self, language, season, episode, tvdb_id, series, filename, download_link): - super(BierDopjeSubtitle, self).__init__(language) - self.season = season - self.episode = episode - self.tvdb_id = tvdb_id - self.series = series - self.filename = filename - self.download_link = download_link - - def compute_matches(self, video): - matches = set() - # tvdb_id - if video.tvdb_id and self.tvdb_id == video.tvdb_id: - matches.add('tvdb_id') - # series - if video.series and self.series == video.series: - matches.add('series') - # season - if video.season and self.season == video.season: - matches.add('season') - # episode - if video.episode and self.episode == video.episode: - matches.add('episode') - matches |= compute_guess_matches(video, guessit.guess_episode_info(self.filename + '.mkv')) - return matches - - -class BierDopjeProvider(Provider): - languages = set([babelfish.Language(l) for l in ['eng', 'nld']]) - video_types = (Episode,) - - def initialize(self): - self.session = requests.Session() - self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__} - - def terminate(self): - self.session.close() - - def get(self, url, **params): - """Make a GET request on the `url` formatted with `**params` - - :param string url: API part of the URL to reach without the leading slash - :param \*\*params: format specs for the `url` - :return: the response - :rtype: :class:`xml.etree.ElementTree.Element` - :raise: :class:`~subliminal.exceptions.ProviderNotAvailable` - - """ - try: - r = self.session.get('http://api.bierdopje.com/A2B638AC5D804C2E/' + url.format(**params), timeout=10) - except requests.Timeout: - raise ProviderNotAvailable('Timeout after 10 seconds') - if r.status_code == 429: - raise ProviderNotAvailable('Too Many Requests') - elif r.status_code != 200: - raise ProviderError('Request failed with status code %d' % r.status_code) - return xml.etree.ElementTree.fromstring(r.content) - - @region.cache_on_arguments() - def find_show_id(self, series): - """Find the show id from series name - - :param string series: series of the episode - :return: show id - :rtype: int - - """ - logger.debug('Searching for series %r', series) - root = self.get('FindShowByName/{series}', series=urllib.quote(series)) - if root.find('response/status').text == 'false': - logger.info('Series %r not found', series) - return None - try: - return int(root.find('response/results/result[1]/showid').text) - except SyntaxError: - # Python < 2.7; fail gracefully - return None - - def query(self, language, season, episode, tvdb_id=None, series=None): - params = {'language': language.alpha2, 'season': season, 'episode': episode} - if tvdb_id is not None: - params['showid'] = tvdb_id - params['istvdbid'] = 'true' - elif series is not None: - show_id = self.find_show_id(series) - if show_id is None: - return [] - params['showid'] = show_id - params['istvdbid'] = 'false' - else: - raise ValueError('Missing parameter tvdb_id or series') - logger.debug('Searching subtitles %r', params) - root = self.get('GetAllSubsFor/{showid}/{season}/{episode}/{language}/{istvdbid}', **params) - if root.find('response/status').text == 'false': - logger.debug('No subtitle found') - return [] - logger.debug('Found subtitles %r', root.find('response/results')) - return [BierDopjeSubtitle(language, season, episode, tvdb_id, series, result.find('filename').text, - result.find('downloadlink').text) for result in root.find('response/results')] - - def list_subtitles(self, video, languages): - return [s for l in languages for s in self.query(l, video.season, video.episode, video.tvdb_id, video.series)] - - def download_subtitle(self, subtitle): - try: - r = self.session.get(subtitle.download_link, timeout=10) - except requests.Timeout: - raise ProviderNotAvailable('Timeout after 10 seconds') - if r.status_code == 429: - raise ProviderNotAvailable('Too Many Requests') - elif r.status_code != 200: - raise ProviderError('Request failed with status code %d' % r.status_code) - subtitle_text = r.content.decode(charade.detect(r.content)['encoding'], 'replace') - if not is_valid_subtitle(subtitle_text): - raise InvalidSubtitle - return subtitle_text From 3c7634fef215d284091ffd10ef9164b655484f50 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Tue, 11 Nov 2014 14:22:17 -0500 Subject: [PATCH 15/26] eliminated bierdopje entry point to fix testing issue --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 06a7c0f24..6840cabae 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,6 @@ entry_points={ 'console_scripts': ['subliminal = subliminal.cli:subliminal'], 'subliminal.providers': ['addic7ed = subliminal.providers.addic7ed:Addic7edProvider', - 'bierdopje = subliminal.providers.bierdopje:BierDopjeProvider', 'opensubtitles = subliminal.providers.opensubtitles:OpenSubtitlesProvider', 'podnapisi = subliminal.providers.podnapisi:PodnapisiProvider', 'thesubdb = subliminal.providers.thesubdb:TheSubDBProvider', From bc575ea9b2365b3a90d6630616ec2fc11059532f Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Sat, 29 Nov 2014 20:15:28 -0500 Subject: [PATCH 16/26] comparison functions added to help with 3rd party filtering --- subliminal/video.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/subliminal/video.py b/subliminal/video.py index a9d00059d..6eb8e2489 100644 --- a/subliminal/video.py +++ b/subliminal/video.py @@ -71,6 +71,10 @@ def __repr__(self): def __hash__(self): return hash(self.name) + def __eq__(self, other): + return self.__class__.__name__ == other.__class__.__name__\ + and self.name == other.name + class Episode(Video): """Episode :class:`Video` @@ -112,6 +116,18 @@ def fromguess(cls, name, guess): def __repr__(self): return '<%s [%r, %rx%r]>' % (self.__class__.__name__, self.series, self.season, self.episode) + def __hash__(self): + return hash(( + self.series, + self.season, + self.episode, + )) + + def __eq__(self, other): + return self.__class__.__name__ == other.__class__.__name__\ + and self.series == other.series\ + and self.season == other.season\ + and self.episode == other.episode class Movie(Video): """Movie :class:`Video` @@ -147,6 +163,18 @@ def __repr__(self): return '<%s [%r]>' % (self.__class__.__name__, self.title) return '<%s [%r, %r]>' % (self.__class__.__name__, self.title, self.year) + def __hash__(self): + if self.year is None: + return hash(( + self.title, + self.year, + )) + return hash(self.title) + + def __eq__(self, other): + return self.__class__.__name__ == other.__class__.__name__\ + and self.title == other.title\ + and self.year == other.year def scan_subtitle_languages(path): """Search for subtitles with alpha2 extension from a video `path` and return their language From 7fef3cfc93beab76c122b760a073b73d11488bae Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Sat, 29 Nov 2014 20:16:37 -0500 Subject: [PATCH 17/26] scoring adjustments moved into compute_score() and some logging cleanup --- subliminal/api.py | 24 +++++++++++++++--------- subliminal/subtitle.py | 13 +++++++++++-- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/subliminal/api.py b/subliminal/api.py index f1879796c..1c6831d0d 100644 --- a/subliminal/api.py +++ b/subliminal/api.py @@ -74,7 +74,10 @@ def list_subtitles(videos, languages, providers=None, provider_configs=None): except: logger.exception('Unexpected error in provider %r', provider_entry_point.name) continue - logger.info('Found %d subtitles', len(provider_subtitles)) + logger.info('Found %d subtitle(s) on %s' % ( + len(provider_subtitles), + provider_entry_point.name, + )) subtitles[provider_video].extend(provider_subtitles) except ProviderNotAvailable: logger.warning('Provider %r is not available, discarding it', provider_entry_point.name) @@ -207,10 +210,10 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= continue Provider = provider_entry_point.load() if not Provider.languages & languages - subtitle_languages: - logger.info('Skipping provider %r: no language to search for', provider_entry_point.name) + logger.debug('Skipping provider %r: no language to search for', provider_entry_point.name) continue if not [v for v in videos if Provider.check(v)]: - logger.info('Skipping provider %r: no video to search for', provider_entry_point.name) + logger.debug('Skipping provider %r: video type not hosted here.', provider_entry_point.name) continue provider = Provider(**provider_configs.get(provider_entry_point.name, {})) try: @@ -253,27 +256,30 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= except: logger.exception('Unexpected error in provider %r', provider_name) continue - logger.info('Found %d subtitles', len(provider_subtitles)) + logger.info('Found %d subtitle(s) on %s' % ( + len(provider_subtitles), + provider_name, + )) subtitles.extend(provider_subtitles) # find the best subtitles and download them - for subtitle, score in sorted([(s, s.compute_score(video)) for s in subtitles], - key=operator.itemgetter(1), reverse=True): + for subtitle, score in sorted([(s, s.compute_score(video, hi_score_adjust)) \ + for s in subtitles], key=operator.itemgetter(1), reverse=True): + # filter if subtitle.provider_name in discarded_providers: logger.debug('Skipping subtitle from discarded provider %r', subtitle.provider_name) continue + if hearing_impaired is not None: if subtitle.hearing_impaired != hearing_impaired: logger.debug('Skipping subtitle: hearing impaired != %r', hearing_impaired) continue - elif subtitle.hearing_impaired and hi_score_adjust != 0: - # Priortization (adjust score) - score += hi_score_adjust if score < min_score: logger.debug('Skipping subtitle: score < %d', min_score) continue + if subtitle.language in downloaded_languages: logger.debug('Skipping subtitle: %r already downloaded', subtitle.language) continue diff --git a/subliminal/subtitle.py b/subliminal/subtitle.py index fe92f41e0..f4a44a1c7 100644 --- a/subliminal/subtitle.py +++ b/subliminal/subtitle.py @@ -33,7 +33,7 @@ def compute_matches(self, video): """ raise NotImplementedError - def compute_score(self, video): + def compute_score(self, video, hi_score_adjust=0): """Compute the score of the subtitle against the `video` There are equivalent matches so that a provider can match one element or its equivalent. This is @@ -47,6 +47,7 @@ def compute_score(self, video): :param video: the video to compute the score against :type video: :class:`~subliminal.video.Video` + :param hi_score_adjust: adjust hearing impaired matched videos by this value :return: score of the subtitle :rtype: int @@ -69,7 +70,15 @@ def compute_score(self, video): matches -= set(['season', 'episode']) # add other scores score += sum([video.scores[match] for match in matches]) - logger.info('Computed score %d with matches %r', score, initial_matches) + + # Adjust scoring if hearing impaired subtitles are detected + if self.hearing_impaired and hi_score_adjust != 0: + logger.debug('Hearing impaired subtitle score adjusted ' + \ + 'by %d' % hi_score_adjust) + # Priortization (adjust score) + score += hi_score_adjust + + logger.debug('Computed score %d with matches %r', score, initial_matches) return score def __repr__(self): From e6bd7040e3c3a53b33ebcce039f2ebca64400ff9 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Sun, 7 Dec 2014 17:12:08 -0500 Subject: [PATCH 18/26] scan_video() now takes pre-guessed video as optional input. This grants the flexibility for 3rd party apps that wrap the subliminal framework to do their own guess management. The default behaviour of this fuction will remain the same as it always has. This new feature will only kick in if the video is specified to be used instead. --- subliminal/video.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/subliminal/video.py b/subliminal/video.py index 6eb8e2489..447ac0628 100644 --- a/subliminal/video.py +++ b/subliminal/video.py @@ -197,12 +197,14 @@ def scan_subtitle_languages(path): return subtitles -def scan_video(path, subtitles=True, embedded_subtitles=True): +def scan_video(path, subtitles=True, embedded_subtitles=True, video=None): """Scan a video and its subtitle languages from a video `path` :param string path: absolute path to the video :param bool subtitles: scan for subtitles with the same name :param bool embedded_subtitles: scan for embedded subtitles + :parm :class:`Video`: optionally specify a video if you've already detected on + by other means. :return: the scanned video :rtype: :class:`Video` :raise: ValueError if cannot guess enough information from the path @@ -210,7 +212,12 @@ def scan_video(path, subtitles=True, embedded_subtitles=True): """ dirpath, filename = os.path.split(path) logger.info('Scanning video %r in %r', filename, dirpath) - video = Video.fromguess(path, guessit.guess_file_info(path, info=['filename'])) + if not video: + video = Video.fromguess( + path, + guessit.guess_file_info(path, info=['filename']), + ) + video.size = os.path.getsize(path) if video.size > 10485760: logger.debug('Size is %d', video.size) From d45e3b0941e73715be02e9563d3c200ef2663278 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Sun, 25 Jan 2015 19:47:53 -0500 Subject: [PATCH 19/26] improved logging for provider debugging --- subliminal/api.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/subliminal/api.py b/subliminal/api.py index 1c6831d0d..606e9f918 100644 --- a/subliminal/api.py +++ b/subliminal/api.py @@ -68,8 +68,9 @@ def list_subtitles(videos, languages, providers=None, provider_configs=None): provider_entry_point.name, provider_video, provider_video_languages) try: provider_subtitles = provider.list_subtitles(provider_video, provider_video_languages) - except ProviderNotAvailable: + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, discarding it', provider_entry_point.name) + logger.debug('ProviderNotAvailable error: %r', str(err)) break except: logger.exception('Unexpected error in provider %r', provider_entry_point.name) @@ -79,8 +80,9 @@ def list_subtitles(videos, languages, providers=None, provider_configs=None): provider_entry_point.name, )) subtitles[provider_video].extend(provider_subtitles) - except ProviderNotAvailable: + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, discarding it', provider_entry_point.name) + logger.debug('ProviderNotAvailable error: %r', str(err)) return subtitles @@ -121,8 +123,9 @@ def download_subtitles(subtitles, provider_configs=None, single=False): provider = providers_by_name[subtitle.provider_name](**provider_configs.get(subtitle.provider_name, {})) try: provider.initialize() - except ProviderNotAvailable: + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, discarding it', subtitle.provider_name) + logger.debug('ProviderNotAvailable error: %r', str(err)) discarded_providers.add(subtitle.provider_name) continue except socket_error as err: @@ -146,8 +149,9 @@ def download_subtitles(subtitles, provider_configs=None, single=False): try: subtitle_text = provider.download_subtitle(subtitle) downloaded_subtitles[video].append(subtitle) - except ProviderNotAvailable: + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, discarding it', subtitle.provider_name) + logger.debug('ProviderNotAvailable error: %r', str(err)) discarded_providers.add(subtitle.provider_name) continue except InvalidSubtitle: @@ -218,8 +222,9 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= provider = Provider(**provider_configs.get(provider_entry_point.name, {})) try: provider.initialize() - except ProviderNotAvailable: + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, discarding it', provider_entry_point.name) + logger.debug('ProviderNotAvailable error: %r', str(err)) continue except socket_error as err: logger.warning('Provider %r is not responding, discarding it', provider_entry_point.name) @@ -249,8 +254,9 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= provider_name, video, provider_video_languages) try: provider_subtitles = provider.list_subtitles(video, provider_video_languages) - except ProviderNotAvailable: + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, discarding it', provider_name) + logger.debug('ProviderNotAvailable error: %r', str(err)) discarded_providers.add(provider_name) continue except: @@ -295,8 +301,9 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= try: subtitle_text = provider.download_subtitle(subtitle) downloaded_subtitles[video].append(subtitle) - except ProviderNotAvailable: + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, discarding it', subtitle.provider_name) + logger.debug('ProviderNotAvailable error: %r', str(err)) discarded_providers.add(subtitle.provider_name) continue except InvalidSubtitle: @@ -316,8 +323,9 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= for (provider_name, provider) in initialized_providers.items(): try: provider.terminate() - except ProviderNotAvailable: + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, unable to terminate', provider_name) + logger.debug('ProviderNotAvailable error: %r', str(err)) except socket_error as err: logger.warning('Provider %r is not available, unable to terminate', provider_name) logger.debug('Provider socket error: %r', str(err)) From a25640b57f3741a6981308ed5824fa99e3f884fe Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Sun, 25 Jan 2015 19:57:26 -0500 Subject: [PATCH 20/26] fixes #425; I'm not proud of this fix; but it resolves the issue. Hopefully the Addic7ed administrator can respond to my email inquiring as to why they are blocking us by the User-Agent string. Automation is the 21st century of the internet; No one wants to click 8 times past banners just to get a 1KB (in size) subtitle. Most people have Ad blocking software and don't even see these banners anyway. There are many other ways to get people on board with helping them out financially (if that's what this is about), and at the same time accomodate those who've automated their service. I will roll back this commit when we can come to a better resolution. --- subliminal/api.py | 21 ++++++++++++++++ subliminal/cli.py | 10 -------- subliminal/providers/addic7ed.py | 43 ++++++-------------------------- 3 files changed, 29 insertions(+), 45 deletions(-) diff --git a/subliminal/api.py b/subliminal/api.py index 606e9f918..902e05d8a 100644 --- a/subliminal/api.py +++ b/subliminal/api.py @@ -10,9 +10,30 @@ from .exceptions import ProviderNotAvailable, InvalidSubtitle from .subtitle import get_subtitle_path from socket import error as socket_error +from random import randint logger = logging.getLogger(__name__) +# Agent List +AGENT_LIST = ( + 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0', + 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0', + 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0', + 'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', + 'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko', + 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A', + 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25', +) + +# Returns a random agent to use from the list above +RANDOM_USER_AGENT = AGENT_LIST[randint(0, len(AGENT_LIST)-1)] + #: Entry point for the providers PROVIDERS_ENTRY_POINT = 'subliminal.providers' diff --git a/subliminal/cli.py b/subliminal/cli.py index 350c1812c..2d6e94c8e 100644 --- a/subliminal/cli.py +++ b/subliminal/cli.py @@ -52,11 +52,6 @@ def subliminal(): filtering_group.add_argument('-f', '--force', action='store_true', help='force subtitle download for videos with existing subtitles') - # addic7ed - addic7ed_group = parser.add_argument_group('addic7ed') - addic7ed_group.add_argument('--addic7ed-username', metavar='USERNAME', help='username for addic7ed provider') - addic7ed_group.add_argument('--addic7ed-password', metavar='PASSWORD', help='password for addic7ed provider') - # output output_group = parser.add_argument_group('output') output_exclusive_group = output_group.add_mutually_exclusive_group() @@ -100,11 +95,6 @@ def subliminal(): # parse provider configs provider_configs = {} - if (args.addic7ed_username is not None and args.addic7ed_password is None - or args.addic7ed_username is None and args.addic7ed_password is not None): - parser.error('argument --addic7ed-username/--addic7ed-password: both arguments are required or none') - if args.addic7ed_username is not None and args.addic7ed_password is not None: - provider_configs['addic7ed'] = {'username': args.addic7ed_username, 'password': args.addic7ed_password} # parse color if args.color and colorlog is None: diff --git a/subliminal/providers/addic7ed.py b/subliminal/providers/addic7ed.py index 7ef8575c6..7922fd1d9 100644 --- a/subliminal/providers/addic7ed.py +++ b/subliminal/providers/addic7ed.py @@ -9,6 +9,7 @@ from . import IGNORED_CHARACTERS_RE from .. import __version__ from ..cache import region +from ..api import RANDOM_USER_AGENT from ..exceptions import ProviderConfigurationError, ProviderNotAvailable, InvalidSubtitle from ..subtitle import Subtitle, is_valid_subtitle from ..video import Episode @@ -62,41 +63,13 @@ class Addic7edProvider(Provider): video_types = (Episode,) server = 'http://www.addic7ed.com' - def __init__(self, username=None, password=None): - if username is not None and password is None or username is None and password is not None: - raise ProviderConfigurationError('Username and password must be specified') - self.username = username - self.password = password - self.logged_in = False - def initialize(self): self.session = requests.Session() - self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__} - # login - if self.username is not None and self.password is not None: - logger.debug('Logging in to Addic7ed') - data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'} - try: - r = self.session.post(self.server + '/dologin.php', data, timeout=10, allow_redirects=False) - except requests.Timeout: - raise ProviderNotAvailable('Timeout after 10 seconds') - if r.status_code == 302: - logger.debug('Successfully logged in to Addic7ed.') - self.logged_in = True - else: - logger.error('Failed to login to Addic7ed!') - - def terminate(self): - # logout - if self.logged_in: - try: - r = self.session.get(self.server + '/logout.php', timeout=10) - logger.debug('Successfully logged out of Addic7ed.') - except requests.Timeout: - raise ProviderNotAvailable('Timeout after 10 seconds') - if r.status_code != 200: - raise ProviderNotAvailable('Request failed with status code %d' % r.status_code) - self.session.close() + #self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__} + self.session.headers = { + 'User-Agent': RANDOM_USER_AGENT, + 'Referer': self.server, + } def get(self, url, params=None): """Make a GET request on `url` with the given parameters @@ -109,7 +82,7 @@ def get(self, url, params=None): """ try: - r = self.session.get(self.server + url, params=params, timeout=10) + r = self.session.get(self.server + url, params=params, timeout=30) except requests.Timeout: raise ProviderNotAvailable('Timeout after 10 seconds') if r.status_code != 200: @@ -184,7 +157,7 @@ def list_subtitles(self, video, languages): def download_subtitle(self, subtitle): try: - r = self.session.get(self.server + subtitle.download_link, timeout=10, + r = self.session.get(self.server + subtitle.download_link, timeout=30, headers={'Referer': self.server + subtitle.referer}) except requests.Timeout: raise ProviderNotAvailable('Timeout after 10 seconds') From 3acbefb9c242083d892f46c66cb489c663fb5538 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Sun, 25 Jan 2015 19:57:26 -0500 Subject: [PATCH 21/26] fixes #428; I'm not proud of this fix; but it resolves the issue. Hopefully the Addic7ed administrator can respond to my email inquiring as to why they are blocking us by the User-Agent string. Automation is the 21st century of the internet; No one wants to click 8 times past banners just to get a 1KB (in size) subtitle. Most people have Ad blocking software and don't even see these banners anyway. There are many other ways to get people on board with helping them out financially (if that's what this is about), and at the same time accomodate those who've automated their service. I will roll back this commit when we can come to a better resolution. --- subliminal/api.py | 21 ++++++++++++++++ subliminal/cli.py | 10 -------- subliminal/providers/addic7ed.py | 43 ++++++-------------------------- 3 files changed, 29 insertions(+), 45 deletions(-) diff --git a/subliminal/api.py b/subliminal/api.py index 606e9f918..902e05d8a 100644 --- a/subliminal/api.py +++ b/subliminal/api.py @@ -10,9 +10,30 @@ from .exceptions import ProviderNotAvailable, InvalidSubtitle from .subtitle import get_subtitle_path from socket import error as socket_error +from random import randint logger = logging.getLogger(__name__) +# Agent List +AGENT_LIST = ( + 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0', + 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0', + 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0', + 'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', + 'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko', + 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A', + 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25', +) + +# Returns a random agent to use from the list above +RANDOM_USER_AGENT = AGENT_LIST[randint(0, len(AGENT_LIST)-1)] + #: Entry point for the providers PROVIDERS_ENTRY_POINT = 'subliminal.providers' diff --git a/subliminal/cli.py b/subliminal/cli.py index 350c1812c..2d6e94c8e 100644 --- a/subliminal/cli.py +++ b/subliminal/cli.py @@ -52,11 +52,6 @@ def subliminal(): filtering_group.add_argument('-f', '--force', action='store_true', help='force subtitle download for videos with existing subtitles') - # addic7ed - addic7ed_group = parser.add_argument_group('addic7ed') - addic7ed_group.add_argument('--addic7ed-username', metavar='USERNAME', help='username for addic7ed provider') - addic7ed_group.add_argument('--addic7ed-password', metavar='PASSWORD', help='password for addic7ed provider') - # output output_group = parser.add_argument_group('output') output_exclusive_group = output_group.add_mutually_exclusive_group() @@ -100,11 +95,6 @@ def subliminal(): # parse provider configs provider_configs = {} - if (args.addic7ed_username is not None and args.addic7ed_password is None - or args.addic7ed_username is None and args.addic7ed_password is not None): - parser.error('argument --addic7ed-username/--addic7ed-password: both arguments are required or none') - if args.addic7ed_username is not None and args.addic7ed_password is not None: - provider_configs['addic7ed'] = {'username': args.addic7ed_username, 'password': args.addic7ed_password} # parse color if args.color and colorlog is None: diff --git a/subliminal/providers/addic7ed.py b/subliminal/providers/addic7ed.py index 7ef8575c6..7922fd1d9 100644 --- a/subliminal/providers/addic7ed.py +++ b/subliminal/providers/addic7ed.py @@ -9,6 +9,7 @@ from . import IGNORED_CHARACTERS_RE from .. import __version__ from ..cache import region +from ..api import RANDOM_USER_AGENT from ..exceptions import ProviderConfigurationError, ProviderNotAvailable, InvalidSubtitle from ..subtitle import Subtitle, is_valid_subtitle from ..video import Episode @@ -62,41 +63,13 @@ class Addic7edProvider(Provider): video_types = (Episode,) server = 'http://www.addic7ed.com' - def __init__(self, username=None, password=None): - if username is not None and password is None or username is None and password is not None: - raise ProviderConfigurationError('Username and password must be specified') - self.username = username - self.password = password - self.logged_in = False - def initialize(self): self.session = requests.Session() - self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__} - # login - if self.username is not None and self.password is not None: - logger.debug('Logging in to Addic7ed') - data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'} - try: - r = self.session.post(self.server + '/dologin.php', data, timeout=10, allow_redirects=False) - except requests.Timeout: - raise ProviderNotAvailable('Timeout after 10 seconds') - if r.status_code == 302: - logger.debug('Successfully logged in to Addic7ed.') - self.logged_in = True - else: - logger.error('Failed to login to Addic7ed!') - - def terminate(self): - # logout - if self.logged_in: - try: - r = self.session.get(self.server + '/logout.php', timeout=10) - logger.debug('Successfully logged out of Addic7ed.') - except requests.Timeout: - raise ProviderNotAvailable('Timeout after 10 seconds') - if r.status_code != 200: - raise ProviderNotAvailable('Request failed with status code %d' % r.status_code) - self.session.close() + #self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__} + self.session.headers = { + 'User-Agent': RANDOM_USER_AGENT, + 'Referer': self.server, + } def get(self, url, params=None): """Make a GET request on `url` with the given parameters @@ -109,7 +82,7 @@ def get(self, url, params=None): """ try: - r = self.session.get(self.server + url, params=params, timeout=10) + r = self.session.get(self.server + url, params=params, timeout=30) except requests.Timeout: raise ProviderNotAvailable('Timeout after 10 seconds') if r.status_code != 200: @@ -184,7 +157,7 @@ def list_subtitles(self, video, languages): def download_subtitle(self, subtitle): try: - r = self.session.get(self.server + subtitle.download_link, timeout=10, + r = self.session.get(self.server + subtitle.download_link, timeout=30, headers={'Referer': self.server + subtitle.referer}) except requests.Timeout: raise ProviderNotAvailable('Timeout after 10 seconds') From e45fe70c95ce9ed4f3fd66921b9d62ef478fe891 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Wed, 28 Jan 2015 19:29:51 -0500 Subject: [PATCH 22/26] updated requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e54cf28f4..b0d7aea51 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ beautifulsoup4>=4.3.2 -guessit>=0.7 +guessit>=0.7,<0.10 requests>=2.0.1 enzyme>=0.4.0 html5lib>=0.99 From a86694d8d18bb57e9e232694863a7a030e9f780b Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Wed, 28 Jan 2015 19:29:51 -0500 Subject: [PATCH 23/26] updated requirements so that Travis CI doesn't haul in guessit v0.10 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e54cf28f4..b0d7aea51 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ beautifulsoup4>=4.3.2 -guessit>=0.7 +guessit>=0.7,<0.10 requests>=2.0.1 enzyme>=0.4.0 html5lib>=0.99 From c74aa01c094ebd5b7ab1382e9a80bbc8f9f55ff7 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Sun, 1 Feb 2015 14:46:35 -0500 Subject: [PATCH 24/26] random user agent determined at runtime; default user-agent reference centralized --- subliminal/api.py | 21 --------------------- subliminal/providers/__init__.py | 28 +++++++++++++++++++++++++++- subliminal/providers/addic7ed.py | 4 +--- subliminal/providers/podnapisi.py | 3 +-- subliminal/providers/tvsubtitles.py | 3 +-- 5 files changed, 30 insertions(+), 29 deletions(-) diff --git a/subliminal/api.py b/subliminal/api.py index 902e05d8a..606e9f918 100644 --- a/subliminal/api.py +++ b/subliminal/api.py @@ -10,30 +10,9 @@ from .exceptions import ProviderNotAvailable, InvalidSubtitle from .subtitle import get_subtitle_path from socket import error as socket_error -from random import randint logger = logging.getLogger(__name__) -# Agent List -AGENT_LIST = ( - 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0', - 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0', - 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0', - 'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', - 'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko', - 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A', - 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25', -) - -# Returns a random agent to use from the list above -RANDOM_USER_AGENT = AGENT_LIST[randint(0, len(AGENT_LIST)-1)] - #: Entry point for the providers PROVIDERS_ENTRY_POINT = 'subliminal.providers' diff --git a/subliminal/providers/__init__.py b/subliminal/providers/__init__.py index d0429e7ce..4949428e9 100644 --- a/subliminal/providers/__init__.py +++ b/subliminal/providers/__init__.py @@ -2,11 +2,30 @@ from __future__ import unicode_literals import babelfish from ..video import Episode, Movie - +from .. import __version__ +from random import randint import re + #: The following characters are always stripped IGNORED_CHARACTERS_RE = re.compile('[!@#$\'"]') +# Agent List +AGENT_LIST = ( + 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0', + 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0', + 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0', + 'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', + 'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko', + 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A', + 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25', +) + class Provider(object): """Base class for providers @@ -25,6 +44,13 @@ class Provider(object): #: Required hash, if any required_hash = None + # Returns a random agent to use from the list above + random_user_agent = AGENT_LIST[randint(0, len(AGENT_LIST)-1)] + + # Defines the ideal user agent to use for all providers otherwise + primary_user_agent = 'Subliminal/%s' % __version__ + + def __init__(self, **kwargs): pass diff --git a/subliminal/providers/addic7ed.py b/subliminal/providers/addic7ed.py index 7922fd1d9..9a0fb37ee 100644 --- a/subliminal/providers/addic7ed.py +++ b/subliminal/providers/addic7ed.py @@ -9,7 +9,6 @@ from . import IGNORED_CHARACTERS_RE from .. import __version__ from ..cache import region -from ..api import RANDOM_USER_AGENT from ..exceptions import ProviderConfigurationError, ProviderNotAvailable, InvalidSubtitle from ..subtitle import Subtitle, is_valid_subtitle from ..video import Episode @@ -65,9 +64,8 @@ class Addic7edProvider(Provider): def initialize(self): self.session = requests.Session() - #self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__} self.session.headers = { - 'User-Agent': RANDOM_USER_AGENT, + 'User-Agent': self.random_user_agent, 'Referer': self.server, } diff --git a/subliminal/providers/podnapisi.py b/subliminal/providers/podnapisi.py index 65d448c7c..2de0b1fd0 100644 --- a/subliminal/providers/podnapisi.py +++ b/subliminal/providers/podnapisi.py @@ -13,7 +13,6 @@ import requests from . import Provider from . import IGNORED_CHARACTERS_RE -from .. import __version__ from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError from ..subtitle import Subtitle, is_valid_subtitle, compute_guess_matches from ..video import Episode, Movie @@ -84,7 +83,7 @@ class PodnapisiProvider(Provider): def initialize(self): self.session = requests.Session() - self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__} + self.session.headers = {'User-Agent': self.primary_user_agent } def terminate(self): self.session.close() diff --git a/subliminal/providers/tvsubtitles.py b/subliminal/providers/tvsubtitles.py index a653663b9..8b0059548 100644 --- a/subliminal/providers/tvsubtitles.py +++ b/subliminal/providers/tvsubtitles.py @@ -11,7 +11,6 @@ import requests from . import Provider from . import IGNORED_CHARACTERS_RE -from .. import __version__ from ..cache import region from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError from ..subtitle import Subtitle, is_valid_subtitle @@ -70,7 +69,7 @@ class TVsubtitlesProvider(Provider): def initialize(self): self.session = requests.Session() - self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__} + self.session.headers = {'User-Agent': self.primary_user_agent } def terminate(self): self.session.close() From 16ddb8a02e1da3b499b00036e2c3e52e7382c618 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Sun, 1 Feb 2015 14:48:14 -0500 Subject: [PATCH 25/26] addic7ed timeout reverted back to 10 second timeout --- subliminal/providers/addic7ed.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subliminal/providers/addic7ed.py b/subliminal/providers/addic7ed.py index 9a0fb37ee..658a25d8e 100644 --- a/subliminal/providers/addic7ed.py +++ b/subliminal/providers/addic7ed.py @@ -80,7 +80,7 @@ def get(self, url, params=None): """ try: - r = self.session.get(self.server + url, params=params, timeout=30) + r = self.session.get(self.server + url, params=params, timeout=10) except requests.Timeout: raise ProviderNotAvailable('Timeout after 10 seconds') if r.status_code != 200: @@ -155,7 +155,7 @@ def list_subtitles(self, video, languages): def download_subtitle(self, subtitle): try: - r = self.session.get(self.server + subtitle.download_link, timeout=30, + r = self.session.get(self.server + subtitle.download_link, timeout=10, headers={'Referer': self.server + subtitle.referer}) except requests.Timeout: raise ProviderNotAvailable('Timeout after 10 seconds') From 407cbe1679bbf283285b4a6c7b51282a9aa714a4 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Sun, 1 Feb 2015 15:46:43 -0500 Subject: [PATCH 26/26] series/movie title sanitization refactored --- subliminal/providers/__init__.py | 4 ---- subliminal/providers/addic7ed.py | 15 ++++++--------- subliminal/providers/opensubtitles.py | 10 +++++----- subliminal/providers/podnapisi.py | 10 +++++----- subliminal/providers/tvsubtitles.py | 22 ++++++++++------------ subliminal/subtitle.py | 21 +++++++++++++++++++++ 6 files changed, 47 insertions(+), 35 deletions(-) diff --git a/subliminal/providers/__init__.py b/subliminal/providers/__init__.py index 4949428e9..581799b97 100644 --- a/subliminal/providers/__init__.py +++ b/subliminal/providers/__init__.py @@ -4,10 +4,6 @@ from ..video import Episode, Movie from .. import __version__ from random import randint -import re - -#: The following characters are always stripped -IGNORED_CHARACTERS_RE = re.compile('[!@#$\'"]') # Agent List AGENT_LIST = ( diff --git a/subliminal/providers/addic7ed.py b/subliminal/providers/addic7ed.py index 658a25d8e..d6cfc5df0 100644 --- a/subliminal/providers/addic7ed.py +++ b/subliminal/providers/addic7ed.py @@ -6,11 +6,9 @@ import charade import requests from . import Provider -from . import IGNORED_CHARACTERS_RE -from .. import __version__ from ..cache import region from ..exceptions import ProviderConfigurationError, ProviderNotAvailable, InvalidSubtitle -from ..subtitle import Subtitle, is_valid_subtitle +from ..subtitle import Subtitle, is_valid_subtitle, sanitize_string from ..video import Episode @@ -98,8 +96,7 @@ def get_show_ids(self): soup = self.get('/shows.php') show_ids = {} for html_show in soup.select('td.version > h3 > a[href^="/show/"]'): - show_ids[ - IGNORED_CHARACTERS_RE.sub('', html_show.string).lower()] = \ + show_ids[sanitize_string(html_show.string)] = \ int(html_show['href'][6:]) return show_ids @@ -124,11 +121,11 @@ def find_show_id(self, series): def query(self, series, season): show_ids = self.get_show_ids() - _series = IGNORED_CHARACTERS_RE.sub('', series).lower() - if _series in show_ids: - show_id = show_ids[_series] + sanitized_series = sanitize_string(series) + if sanitized_series in show_ids: + show_id = show_ids[sanitized_series] else: - show_id = self.find_show_id(_series) + show_id = self.find_show_id(sanitized_series) if show_id is None: return [] params = {'show_id': show_id, 'season': season} diff --git a/subliminal/providers/opensubtitles.py b/subliminal/providers/opensubtitles.py index 9f75fe49e..1efaf086b 100644 --- a/subliminal/providers/opensubtitles.py +++ b/subliminal/providers/opensubtitles.py @@ -10,10 +10,10 @@ import charade import guessit from . import Provider -from . import IGNORED_CHARACTERS_RE from .. import __version__ from ..exceptions import ProviderError, ProviderNotAvailable, InvalidSubtitle from ..subtitle import Subtitle, is_valid_subtitle, compute_guess_matches +from ..subtitle import sanitize_string from ..video import Episode, Movie @@ -52,8 +52,8 @@ def compute_matches(self, video): if isinstance(video, Episode) and self.movie_kind == 'episode': # series if video.series and \ - IGNORED_CHARACTERS_RE.sub('', self.series_name).lower() == \ - IGNORED_CHARACTERS_RE.sub('', video.series).lower(): + sanitize_string(self.series_name) == \ + sanitize_string(video.series): matches.add('series') # season if video.season and self.series_season == video.season: @@ -81,8 +81,8 @@ def compute_matches(self, video): matches.add('imdb_id') # title if video.title and \ - IGNORED_CHARACTERS_RE.sub('', self.movie_name).lower() == \ - IGNORED_CHARACTERS_RE.sub('', video.title).lower(): + sanitize_string(self.movie_name) == \ + sanitize_string(video.title): matches.add('title') return matches diff --git a/subliminal/providers/podnapisi.py b/subliminal/providers/podnapisi.py index 2de0b1fd0..0148020be 100644 --- a/subliminal/providers/podnapisi.py +++ b/subliminal/providers/podnapisi.py @@ -12,9 +12,9 @@ import guessit import requests from . import Provider -from . import IGNORED_CHARACTERS_RE from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError from ..subtitle import Subtitle, is_valid_subtitle, compute_guess_matches +from ..subtitle import sanitize_string from ..video import Episode, Movie @@ -46,8 +46,8 @@ def compute_matches(self, video): if isinstance(video, Episode): # series if video.series and \ - IGNORED_CHARACTERS_RE.sub('', self.series).lower() == \ - IGNORED_CHARACTERS_RE.sub('', video.series).lower(): + sanitize_string(self.series) == \ + sanitize_string(video.series): matches.add('series') # season if video.season and self.season == video.season: @@ -62,8 +62,8 @@ def compute_matches(self, video): elif isinstance(video, Movie): # title if video.title and \ - IGNORED_CHARACTERS_RE.sub('', self.title).lower() == \ - IGNORED_CHARACTERS_RE.sub('', video.title).lower(): + sanitize_string(self.title) == \ + sanitize_string(video.title): matches.add('title') # year if video.year and self.year == video.year: diff --git a/subliminal/providers/tvsubtitles.py b/subliminal/providers/tvsubtitles.py index 8b0059548..615b7aa31 100644 --- a/subliminal/providers/tvsubtitles.py +++ b/subliminal/providers/tvsubtitles.py @@ -10,10 +10,9 @@ import charade import requests from . import Provider -from . import IGNORED_CHARACTERS_RE from ..cache import region from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError -from ..subtitle import Subtitle, is_valid_subtitle +from ..subtitle import Subtitle, is_valid_subtitle, sanitize_string from ..video import Episode IGNORE_DATEMATCH=re.compile('^(.*)[ \t0-9-._)(]*$') @@ -107,15 +106,14 @@ def find_show_id(self, series): logger.debug('Searching series %r', data) soup = self.request('/search.php', data=data, method='POST') links = soup.select('div.left li div a[href^="/tvshow-"]') - _series = IGNORE_DATEMATCH.match( - IGNORED_CHARACTERS_RE.sub('', series)\ - .replace('.', ' ').strip().lower(), + sanitized_series = IGNORE_DATEMATCH.match( + sanitize_string(series).replace('.', ' ').strip(), ) - if not _series: - _series = IGNORED_CHARACTERS_RE.sub('', series)\ - .replace('.', ' ').strip().lower() + if not sanitized_series: + sanitized_series = sanitize_string(series)\ + .replace('.', ' ').strip() else: - _series = _series.group(1) + sanitized_series = sanitized_series.group(1) if not links: logger.info('Series %r not found', series) @@ -127,15 +125,15 @@ def find_show_id(self, series): logger.warning('Could not parse %r', link.string) continue show = IGNORE_DATEMATCH.match( - IGNORED_CHARACTERS_RE.sub('', match.group('series'))\ - .replace('.', ' ').strip().lower(), + sanitize_string(match.group('series'))\ + .replace('.', ' ').strip(), ) if not show: logger.warning('Could not postparse %r', match.group('series')) continue show = show.group(1) - if show == _series: + if show == sanitized_series: return int(link['href'][8:-5]) return int(links[0]['href'][8:-5]) diff --git a/subliminal/subtitle.py b/subliminal/subtitle.py index f4a44a1c7..af9086559 100644 --- a/subliminal/subtitle.py +++ b/subliminal/subtitle.py @@ -4,11 +4,15 @@ import os.path import babelfish import pysrt +import re from .video import Episode, Movie logger = logging.getLogger(__name__) +#: The following characters are always stripped +IGNORED_CHARACTERS_RE = re.compile('[!@#$\'"]') + class Subtitle(object): """Base class for subtitle @@ -85,6 +89,23 @@ def __repr__(self): return '<%s [%s]>' % (self.__class__.__name__, self.language) +def sanitize_string(str_in): + """ + Sanitizes a string passed into it by eliminating characters that might + otherwise cause issues when attempting to locate a match on websites by + striping out any special characters and forcing a consistent string that + can be used for caching too. + + :param string str_in: the string to sanitize + :return: sanitized string + :rtype: string + """ + if not isinstance(str_in, basestring): + # handle int, float, etc + str_in = str(str_in) + + return IGNORED_CHARACTERS_RE.sub('', str_in).lower().strip() + def get_subtitle_path(video_path, language=None): """Create the subtitle path from the given `video_path` and `language`