diff --git a/requirements.txt b/requirements.txt index a83b650ee..b0d7aea51 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ beautifulsoup4>=4.3.2 -guessit>=0.6.2,<0.7 +guessit>=0.7,<0.10 requests>=2.0.1 enzyme>=0.4.0 html5lib>=0.99 dogpile.cache>=0.5.2 -babelfish>=0.4.0,<0.5 +babelfish>=0.5.0 charade>=1.0.3 pysrt>=0.5.0 diff --git a/setup.py b/setup.py index 06a7c0f24..6840cabae 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,6 @@ entry_points={ 'console_scripts': ['subliminal = subliminal.cli:subliminal'], 'subliminal.providers': ['addic7ed = subliminal.providers.addic7ed:Addic7edProvider', - 'bierdopje = subliminal.providers.bierdopje:BierDopjeProvider', 'opensubtitles = subliminal.providers.opensubtitles:OpenSubtitlesProvider', 'podnapisi = subliminal.providers.podnapisi:PodnapisiProvider', 'thesubdb = subliminal.providers.thesubdb:TheSubDBProvider', diff --git a/subliminal/__init__.py b/subliminal/__init__.py index 21e323481..0dcc7dddc 100644 --- a/subliminal/__init__.py +++ b/subliminal/__init__.py @@ -12,5 +12,8 @@ from .subtitle import Subtitle from .video import VIDEO_EXTENSIONS, SUBTITLE_EXTENSIONS, Video, Episode, Movie, scan_videos, scan_video +class NullHandler(logging.Handler): + def emit(self, record): + pass -logging.getLogger(__name__).addHandler(logging.NullHandler()) +logging.getLogger(__name__).addHandler(NullHandler()) diff --git a/subliminal/api.py b/subliminal/api.py index ed840a614..606e9f918 100644 --- a/subliminal/api.py +++ b/subliminal/api.py @@ -6,9 +6,10 @@ import operator import babelfish import pkg_resources +from os.path import basename from .exceptions import ProviderNotAvailable, InvalidSubtitle from .subtitle import get_subtitle_path - +from socket import error as socket_error logger = logging.getLogger(__name__) @@ -67,16 +68,21 @@ def list_subtitles(videos, languages, providers=None, provider_configs=None): provider_entry_point.name, provider_video, provider_video_languages) try: provider_subtitles = provider.list_subtitles(provider_video, provider_video_languages) - except ProviderNotAvailable: + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, discarding it', provider_entry_point.name) + logger.debug('ProviderNotAvailable error: %r', str(err)) break except: logger.exception('Unexpected error in provider %r', provider_entry_point.name) continue - logger.info('Found %d subtitles', len(provider_subtitles)) + logger.info('Found %d subtitle(s) on %s' % ( + len(provider_subtitles), + provider_entry_point.name, + )) subtitles[provider_video].extend(provider_subtitles) - except ProviderNotAvailable: + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, discarding it', provider_entry_point.name) + logger.debug('ProviderNotAvailable error: %r', str(err)) return subtitles @@ -92,15 +98,19 @@ def download_subtitles(subtitles, provider_configs=None, single=False): """ provider_configs = provider_configs or {} discarded_providers = set() - providers_by_name = {ep.name: ep.load() for ep in pkg_resources.iter_entry_points(PROVIDERS_ENTRY_POINT)} + providers_by_name = dict([(ep.name, ep.load()) for ep in pkg_resources.iter_entry_points(PROVIDERS_ENTRY_POINT)]) + initialized_providers = {} + downloaded_subtitles = collections.defaultdict(list) + fetched_subtitles = set() try: for video, video_subtitles in subtitles.items(): - languages = {subtitle.language for subtitle in video_subtitles} + languages = set([subtitle.language for subtitle in video_subtitles]) downloaded_languages = set() for subtitle in video_subtitles: # filter if subtitle.language in downloaded_languages: + logger.debug('Skipping subtitle: %r already downloaded', subtitle.language) continue if subtitle.provider_name in discarded_providers: logger.debug('Skipping subtitle from discarded provider %r', subtitle.provider_name) @@ -113,19 +123,35 @@ def download_subtitles(subtitles, provider_configs=None, single=False): provider = providers_by_name[subtitle.provider_name](**provider_configs.get(subtitle.provider_name, {})) try: provider.initialize() - except ProviderNotAvailable: + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, discarding it', subtitle.provider_name) + logger.debug('ProviderNotAvailable error: %r', str(err)) + discarded_providers.add(subtitle.provider_name) + continue + except socket_error as err: + logger.warning('Provider %r is not responding, discarding it', subtitle.provider_name) + logger.debug('Provider socket error: %r', str(err)) + discarded_providers.add(subtitle.provider_name) + continue + except: + logger.exception('Unexpected error in provider %r', subtitle.provider_name) discarded_providers.add(subtitle.provider_name) continue initialized_providers[subtitle.provider_name] = provider # download subtitles subtitle_path = get_subtitle_path(video.name, None if single else subtitle.language) + if basename(subtitle_path) in fetched_subtitles: + logger.debug('Skipping subtitle already retrieved %r', basename(subtitle_path)) + continue + logger.info('Downloading subtitle %r into %r', subtitle, subtitle_path) try: subtitle_text = provider.download_subtitle(subtitle) - except ProviderNotAvailable: + downloaded_subtitles[video].append(subtitle) + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, discarding it', subtitle.provider_name) + logger.debug('ProviderNotAvailable error: %r', str(err)) discarded_providers.add(subtitle.provider_name) continue except InvalidSubtitle: @@ -136,8 +162,9 @@ def download_subtitles(subtitles, provider_configs=None, single=False): continue with io.open(subtitle_path, 'w', encoding='utf-8') as f: f.write(subtitle_text) - downloaded_languages.add(subtitle.language) - if single or downloaded_languages == languages: + downloaded_languages.add(subtitle.language) + fetched_subtitles.add(basename(subtitle_path)) + if single or sorted(downloaded_languages) == sorted(languages): break finally: # terminate providers for (provider_name, provider) in initialized_providers.items(): @@ -147,10 +174,11 @@ def download_subtitles(subtitles, provider_configs=None, single=False): logger.warning('Provider %r is not available, unable to terminate', provider_name) except: logger.exception('Unexpected error in provider %r', provider_name) + return downloaded_subtitles def download_best_subtitles(videos, languages, providers=None, provider_configs=None, single=False, min_score=0, - hearing_impaired=False): + hearing_impaired=False, hi_score_adjust=0): """Download the best subtitles for `videos` with the given `languages` using the specified `providers` :param videos: videos to download subtitles for @@ -164,11 +192,13 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= :param bool single: download with .srt extension if `True`, add language identifier otherwise :param int min_score: minimum score for subtitles to download :param bool hearing_impaired: download hearing impaired subtitles + :param int hi_score_adjust: Adjust hearing_impaired_scores if matched. """ provider_configs = provider_configs or {} discarded_providers = set() downloaded_subtitles = collections.defaultdict(list) + fetched_subtitles = set() # filter videos videos = [v for v in videos if v.subtitle_languages & languages < languages and (not single or babelfish.Language('und') not in v.subtitle_languages)] @@ -184,27 +214,37 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= continue Provider = provider_entry_point.load() if not Provider.languages & languages - subtitle_languages: - logger.info('Skipping provider %r: no language to search for', provider_entry_point.name) + logger.debug('Skipping provider %r: no language to search for', provider_entry_point.name) continue if not [v for v in videos if Provider.check(v)]: - logger.info('Skipping provider %r: no video to search for', provider_entry_point.name) + logger.debug('Skipping provider %r: video type not hosted here.', provider_entry_point.name) continue provider = Provider(**provider_configs.get(provider_entry_point.name, {})) try: provider.initialize() - except ProviderNotAvailable: + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, discarding it', provider_entry_point.name) + logger.debug('ProviderNotAvailable error: %r', str(err)) + continue + except socket_error as err: + logger.warning('Provider %r is not responding, discarding it', provider_entry_point.name) + logger.debug('Provider socket error: %r', str(err)) + continue + except: + logger.exception('Unexpected error in provider %r', provider_entry_point.name) continue initialized_providers[provider_entry_point.name] = provider try: for video in videos: # search for subtitles subtitles = [] + downloaded_languages = set() for provider_name, provider in initialized_providers.items(): if provider.check(video): if provider_name in discarded_providers: logger.debug('Skipping discarded provider %r', provider_name) continue + provider_video_languages = provider.languages & languages - video.subtitle_languages if not provider_video_languages: logger.debug('Skipping provider %r: no language to search for for video %r', provider_name, @@ -214,30 +254,38 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= provider_name, video, provider_video_languages) try: provider_subtitles = provider.list_subtitles(video, provider_video_languages) - except ProviderNotAvailable: + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, discarding it', provider_name) + logger.debug('ProviderNotAvailable error: %r', str(err)) discarded_providers.add(provider_name) continue except: logger.exception('Unexpected error in provider %r', provider_name) continue - logger.info('Found %d subtitles', len(provider_subtitles)) + logger.info('Found %d subtitle(s) on %s' % ( + len(provider_subtitles), + provider_name, + )) subtitles.extend(provider_subtitles) # find the best subtitles and download them - downloaded_languages = video.subtitle_languages.copy() - for subtitle, score in sorted([(s, s.compute_score(video)) for s in subtitles], - key=operator.itemgetter(1), reverse=True): + for subtitle, score in sorted([(s, s.compute_score(video, hi_score_adjust)) \ + for s in subtitles], key=operator.itemgetter(1), reverse=True): + # filter if subtitle.provider_name in discarded_providers: logger.debug('Skipping subtitle from discarded provider %r', subtitle.provider_name) continue - if subtitle.hearing_impaired != hearing_impaired: - logger.debug('Skipping subtitle: hearing impaired != %r', hearing_impaired) - continue + + if hearing_impaired is not None: + if subtitle.hearing_impaired != hearing_impaired: + logger.debug('Skipping subtitle: hearing impaired != %r', hearing_impaired) + continue + if score < min_score: logger.debug('Skipping subtitle: score < %d', min_score) continue + if subtitle.language in downloaded_languages: logger.debug('Skipping subtitle: %r already downloaded', subtitle.language) continue @@ -245,12 +293,17 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= # download provider = initialized_providers[subtitle.provider_name] subtitle_path = get_subtitle_path(video.name, None if single else subtitle.language) + if basename(subtitle_path) in fetched_subtitles: + logger.debug('Skipping subtitle already retrieved %r', basename(subtitle_path)) + continue + logger.info('Downloading subtitle %r with score %d into %r', subtitle, score, subtitle_path) try: subtitle_text = provider.download_subtitle(subtitle) downloaded_subtitles[video].append(subtitle) - except ProviderNotAvailable: + except ProviderNotAvailable as err: logger.warning('Provider %r is not available, discarding it', subtitle.provider_name) + logger.debug('ProviderNotAvailable error: %r', str(err)) discarded_providers.add(subtitle.provider_name) continue except InvalidSubtitle: @@ -261,16 +314,21 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs= continue with io.open(subtitle_path, 'w', encoding='utf-8') as f: f.write(subtitle_text) - downloaded_languages.add(subtitle.language) - if single or downloaded_languages >= languages: - logger.debug('All languages downloaded') + downloaded_languages.add(subtitle.language) + fetched_subtitles.add(basename(subtitle_path)) + if single or sorted(downloaded_languages) == sorted(languages): break + finally: # terminate providers for (provider_name, provider) in initialized_providers.items(): try: provider.terminate() - except ProviderNotAvailable: + except ProviderNotAvailable as err: + logger.warning('Provider %r is not available, unable to terminate', provider_name) + logger.debug('ProviderNotAvailable error: %r', str(err)) + except socket_error as err: logger.warning('Provider %r is not available, unable to terminate', provider_name) + logger.debug('Provider socket error: %r', str(err)) except: logger.exception('Unexpected error in provider %r', provider_name) return downloaded_subtitles diff --git a/subliminal/cache.py b/subliminal/cache.py index fdacbfb79..4735aa26a 100644 --- a/subliminal/cache.py +++ b/subliminal/cache.py @@ -7,7 +7,7 @@ #: Subliminal's cache version -CACHE_VERSION = 1 +CACHE_VERSION = 2 def subliminal_key_generator(namespace, fn, to_str=string_type): diff --git a/subliminal/cli.py b/subliminal/cli.py index 2b5a916f0..2d6e94c8e 100644 --- a/subliminal/cli.py +++ b/subliminal/cli.py @@ -52,11 +52,6 @@ def subliminal(): filtering_group.add_argument('-f', '--force', action='store_true', help='force subtitle download for videos with existing subtitles') - # addic7ed - addic7ed_group = parser.add_argument_group('addic7ed') - addic7ed_group.add_argument('--addic7ed-username', metavar='USERNAME', help='username for addic7ed provider') - addic7ed_group.add_argument('--addic7ed-password', metavar='PASSWORD', help='password for addic7ed provider') - # output output_group = parser.add_argument_group('output') output_exclusive_group = output_group.add_mutually_exclusive_group() @@ -81,7 +76,7 @@ def subliminal(): # parse languages try: - args.languages = {babelfish.Language.fromietf(l) for l in args.languages} + args.languages = set( babelfish.Language.fromietf(l) for l in args.languages ) except babelfish.Error: parser.error('argument -l/--languages: codes are not IETF: %r' % args.languages) @@ -90,7 +85,7 @@ def subliminal(): match = re.match(r'^(?:(?P\d+?)w)?(?:(?P\d+?)d)?(?:(?P\d+?)h)?$', args.age) if not match: parser.error('argument -a/--age: invalid age: %r' % args.age) - args.age = datetime.timedelta(**{k: int(v) for k, v in match.groupdict(0).items()}) + args.age = datetime.timedelta(**dict([(k, int(v)) for k, v in match.groupdict(0).items()])) # parse cache-file args.cache_file = os.path.abspath(os.path.expanduser(args.cache_file)) @@ -100,11 +95,6 @@ def subliminal(): # parse provider configs provider_configs = {} - if (args.addic7ed_username is not None and args.addic7ed_password is None - or args.addic7ed_username is None and args.addic7ed_password is not None): - parser.error('argument --addic7ed-username/--addic7ed-password: both arguments are required or none') - if args.addic7ed_username is not None and args.addic7ed_password is not None: - provider_configs['addic7ed'] = {'username': args.addic7ed_username, 'password': args.addic7ed_password} # parse color if args.color and colorlog is None: @@ -146,7 +136,7 @@ def subliminal(): embedded_subtitles=not args.force, age=args.age) # guess videos - videos.extend([Video.fromguess(os.path.split(p)[1], guessit.guess_file_info(p, 'autodetect')) for p in args.paths + videos.extend([Video.fromguess(os.path.split(p)[1], guessit.guess_file_info(p, info=['filename'])) for p in args.paths if not os.path.exists(p)]) # download best subtitles diff --git a/subliminal/converters/addic7ed.py b/subliminal/converters/addic7ed.py index 2915a2b7d..0e862931d 100644 --- a/subliminal/converters/addic7ed.py +++ b/subliminal/converters/addic7ed.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -from babelfish import LanguageReverseConverter, get_language_converter +from babelfish import LanguageReverseConverter, language_converters class Addic7edConverter(LanguageReverseConverter): def __init__(self): - self.name_converter = get_language_converter('name') + self.name_converter = language_converters['name'] self.from_addic7ed = {'CatalĂ ': ('cat',), 'Chinese (Simplified)': ('zho',), 'Chinese (Traditional)': ('zho',), 'Euskera': ('eus',), 'Galego': ('glg',), 'Greek': ('ell',), 'Malay': ('msa',), 'Portuguese (Brazilian)': ('por', 'BR'), 'Serbian (Cyrillic)': ('srp', None, 'Cyrl'), diff --git a/subliminal/converters/podnapisi.py b/subliminal/converters/podnapisi.py index d73cb1c1f..6b909fb5f 100644 --- a/subliminal/converters/podnapisi.py +++ b/subliminal/converters/podnapisi.py @@ -14,7 +14,7 @@ def __init__(self): 11: ('jpn',), 4: ('kor',), 29: ('sqi',), 6: ('isl',), 19: ('lit',), 46: ('ukr',), 44: ('tha',), 53: ('cat',), 56: ('sin',), 21: ('lav',), 40: ('cmn',), 55: ('msa',), 42: ('hin',), 50: ('bel',)} - self.to_podnapisi = {v: k for k, v in self.from_podnapisi.items()} + self.to_podnapisi = dict([(v, k) for k, v in self.from_podnapisi.items()]) self.codes = set(self.from_podnapisi.keys()) def convert(self, alpha3, country=None, script=None): diff --git a/subliminal/converters/tvsubtitles.py b/subliminal/converters/tvsubtitles.py index d817a2711..196134787 100644 --- a/subliminal/converters/tvsubtitles.py +++ b/subliminal/converters/tvsubtitles.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -from babelfish import LanguageReverseConverter, get_language_converter +from babelfish import LanguageReverseConverter, language_converters class TVsubtitlesConverter(LanguageReverseConverter): def __init__(self): - self.alpha2_converter = get_language_converter('alpha2') + self.alpha2_converter = language_converters['alpha2'] self.from_tvsubtitles = {'br': ('por', 'BR'), 'ua': ('ukr',), 'gr': ('ell',), 'cn': ('zho',), 'jp': ('jpn',), 'cz': ('ces',)} - self.to_tvsubtitles = {v: k for k, v in self.from_tvsubtitles} + self.to_tvsubtitles = set([(v, k) for k, v in self.from_tvsubtitles]) self.codes = self.alpha2_converter.codes | set(self.from_tvsubtitles.keys()) def convert(self, alpha3, country=None, script=None): diff --git a/subliminal/providers/__init__.py b/subliminal/providers/__init__.py index 87d67d933..581799b97 100644 --- a/subliminal/providers/__init__.py +++ b/subliminal/providers/__init__.py @@ -2,7 +2,25 @@ from __future__ import unicode_literals import babelfish from ..video import Episode, Movie - +from .. import __version__ +from random import randint + +# Agent List +AGENT_LIST = ( + 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0', + 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0', + 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0', + 'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', + 'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko', + 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A', + 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25', +) class Provider(object): """Base class for providers @@ -22,6 +40,13 @@ class Provider(object): #: Required hash, if any required_hash = None + # Returns a random agent to use from the list above + random_user_agent = AGENT_LIST[randint(0, len(AGENT_LIST)-1)] + + # Defines the ideal user agent to use for all providers otherwise + primary_user_agent = 'Subliminal/%s' % __version__ + + def __init__(self, **kwargs): pass diff --git a/subliminal/providers/addic7ed.py b/subliminal/providers/addic7ed.py index 79c7622e7..d6cfc5df0 100644 --- a/subliminal/providers/addic7ed.py +++ b/subliminal/providers/addic7ed.py @@ -6,10 +6,9 @@ import charade import requests from . import Provider -from .. import __version__ from ..cache import region from ..exceptions import ProviderConfigurationError, ProviderNotAvailable, InvalidSubtitle -from ..subtitle import Subtitle, is_valid_subtitle +from ..subtitle import Subtitle, is_valid_subtitle, sanitize_string from ..video import Episode @@ -53,49 +52,20 @@ def compute_matches(self, video): class Addic7edProvider(Provider): - languages = {babelfish.Language('por', 'BR')} | {babelfish.Language(l) + languages = set([babelfish.Language('por', 'BR')]) | set([babelfish.Language(l) for l in ['ara', 'aze', 'ben', 'bos', 'bul', 'cat', 'ces', 'dan', 'deu', 'ell', 'eng', 'eus', 'fas', 'fin', 'fra', 'glg', 'heb', 'hrv', 'hun', 'hye', 'ind', 'ita', 'jpn', 'kor', 'mkd', 'msa', 'nld', 'nor', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'tha', - 'tur', 'ukr', 'vie', 'zho']} + 'tur', 'ukr', 'vie', 'zho']]) video_types = (Episode,) server = 'http://www.addic7ed.com' - def __init__(self, username=None, password=None): - if username is not None and password is None or username is None and password is not None: - raise ProviderConfigurationError('Username and password must be specified') - self.username = username - self.password = password - self.logged_in = False - def initialize(self): self.session = requests.Session() - self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__} - # login - if self.username is not None and self.password is not None: - logger.debug('Logging in') - data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'} - try: - r = self.session.post(self.server + '/dologin.php', data, timeout=10, allow_redirects=False) - except requests.Timeout: - raise ProviderNotAvailable('Timeout after 10 seconds') - if r.status_code == 302: - logger.info('Logged in') - self.logged_in = True - else: - logger.error('Failed to login') - - def terminate(self): - # logout - if self.logged_in: - try: - r = self.session.get(self.server + '/logout.php', timeout=10) - logger.info('Logged out') - except requests.Timeout: - raise ProviderNotAvailable('Timeout after 10 seconds') - if r.status_code != 200: - raise ProviderNotAvailable('Request failed with status code %d' % r.status_code) - self.session.close() + self.session.headers = { + 'User-Agent': self.random_user_agent, + 'Referer': self.server, + } def get(self, url, params=None): """Make a GET request on `url` with the given parameters @@ -126,7 +96,8 @@ def get_show_ids(self): soup = self.get('/shows.php') show_ids = {} for html_show in soup.select('td.version > h3 > a[href^="/show/"]'): - show_ids[html_show.string.lower()] = int(html_show['href'][6:]) + show_ids[sanitize_string(html_show.string)] = \ + int(html_show['href'][6:]) return show_ids @region.cache_on_arguments() @@ -150,10 +121,11 @@ def find_show_id(self, series): def query(self, series, season): show_ids = self.get_show_ids() - if series.lower() in show_ids: - show_id = show_ids[series.lower()] + sanitized_series = sanitize_string(series) + if sanitized_series in show_ids: + show_id = show_ids[sanitized_series] else: - show_id = self.find_show_id(series.lower()) + show_id = self.find_show_id(sanitized_series) if show_id is None: return [] params = {'show_id': show_id, 'season': season} diff --git a/subliminal/providers/bierdopje.py b/subliminal/providers/bierdopje.py deleted file mode 100644 index b8f5a5f24..000000000 --- a/subliminal/providers/bierdopje.py +++ /dev/null @@ -1,135 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals -import logging -import urllib -import babelfish -import charade -import guessit -import requests -import xml.etree.ElementTree -from . import Provider -from .. import __version__ -from ..cache import region -from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError -from ..subtitle import Subtitle, is_valid_subtitle, compute_guess_matches -from ..video import Episode - - -logger = logging.getLogger(__name__) - - -class BierDopjeSubtitle(Subtitle): - provider_name = 'bierdopje' - - def __init__(self, language, season, episode, tvdb_id, series, filename, download_link): - super(BierDopjeSubtitle, self).__init__(language) - self.season = season - self.episode = episode - self.tvdb_id = tvdb_id - self.series = series - self.filename = filename - self.download_link = download_link - - def compute_matches(self, video): - matches = set() - # tvdb_id - if video.tvdb_id and self.tvdb_id == video.tvdb_id: - matches.add('tvdb_id') - # series - if video.series and self.series == video.series: - matches.add('series') - # season - if video.season and self.season == video.season: - matches.add('season') - # episode - if video.episode and self.episode == video.episode: - matches.add('episode') - matches |= compute_guess_matches(video, guessit.guess_episode_info(self.filename + '.mkv')) - return matches - - -class BierDopjeProvider(Provider): - languages = {babelfish.Language(l) for l in ['eng', 'nld']} - video_types = (Episode,) - - def initialize(self): - self.session = requests.Session() - self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__} - - def terminate(self): - self.session.close() - - def get(self, url, **params): - """Make a GET request on the `url` formatted with `**params` - - :param string url: API part of the URL to reach without the leading slash - :param \*\*params: format specs for the `url` - :return: the response - :rtype: :class:`xml.etree.ElementTree.Element` - :raise: :class:`~subliminal.exceptions.ProviderNotAvailable` - - """ - try: - r = self.session.get('http://api.bierdopje.com/A2B638AC5D804C2E/' + url.format(**params), timeout=10) - except requests.Timeout: - raise ProviderNotAvailable('Timeout after 10 seconds') - if r.status_code == 429: - raise ProviderNotAvailable('Too Many Requests') - elif r.status_code != 200: - raise ProviderError('Request failed with status code %d' % r.status_code) - return xml.etree.ElementTree.fromstring(r.content) - - @region.cache_on_arguments() - def find_show_id(self, series): - """Find the show id from series name - - :param string series: series of the episode - :return: show id - :rtype: int - - """ - logger.debug('Searching for series %r', series) - root = self.get('FindShowByName/{series}', series=urllib.quote(series)) - if root.find('response/status').text == 'false': - logger.info('Series %r not found', series) - return None - return int(root.find('response/results/result[1]/showid').text) - - def query(self, language, season, episode, tvdb_id=None, series=None): - params = {'language': language.alpha2, 'season': season, 'episode': episode} - if tvdb_id is not None: - params['showid'] = tvdb_id - params['istvdbid'] = 'true' - elif series is not None: - show_id = self.find_show_id(series) - if show_id is None: - return [] - params['showid'] = show_id - params['istvdbid'] = 'false' - else: - raise ValueError('Missing parameter tvdb_id or series') - logger.debug('Searching subtitles %r', params) - root = self.get('GetAllSubsFor/{showid}/{season}/{episode}/{language}/{istvdbid}', **params) - if root.find('response/status').text == 'false': - logger.debug('No subtitle found') - return [] - logger.debug('Found subtitles %r', root.find('response/results')) - return [BierDopjeSubtitle(language, season, episode, tvdb_id, series, result.find('filename').text, - result.find('downloadlink').text) for result in root.find('response/results')] - - def list_subtitles(self, video, languages): - return [s for l in languages for s in self.query(l, video.season, video.episode, video.tvdb_id, video.series)] - - def download_subtitle(self, subtitle): - try: - r = self.session.get(subtitle.download_link, timeout=10) - except requests.Timeout: - raise ProviderNotAvailable('Timeout after 10 seconds') - if r.status_code == 429: - raise ProviderNotAvailable('Too Many Requests') - elif r.status_code != 200: - raise ProviderError('Request failed with status code %d' % r.status_code) - subtitle_text = r.content.decode(charade.detect(r.content)['encoding'], 'replace') - if not is_valid_subtitle(subtitle_text): - raise InvalidSubtitle - return subtitle_text diff --git a/subliminal/providers/opensubtitles.py b/subliminal/providers/opensubtitles.py index b98a29dbd..1efaf086b 100644 --- a/subliminal/providers/opensubtitles.py +++ b/subliminal/providers/opensubtitles.py @@ -13,6 +13,7 @@ from .. import __version__ from ..exceptions import ProviderError, ProviderNotAvailable, InvalidSubtitle from ..subtitle import Subtitle, is_valid_subtitle, compute_guess_matches +from ..subtitle import sanitize_string from ..video import Episode, Movie @@ -50,7 +51,9 @@ def compute_matches(self, video): # episode if isinstance(video, Episode) and self.movie_kind == 'episode': # series - if video.series and self.series_name.lower() == video.series.lower(): + if video.series and \ + sanitize_string(self.series_name) == \ + sanitize_string(video.series): matches.add('series') # season if video.season and self.series_season == video.season: @@ -77,13 +80,15 @@ def compute_matches(self, video): if video.imdb_id and self.movie_imdb_id == video.imdb_id: matches.add('imdb_id') # title - if video.title and self.movie_name.lower() == video.title.lower(): + if video.title and \ + sanitize_string(self.movie_name) == \ + sanitize_string(video.title): matches.add('title') return matches class OpenSubtitlesProvider(Provider): - languages = {babelfish.Language.fromopensubtitles(l) for l in babelfish.get_language_converter('opensubtitles').codes} + languages = set([babelfish.Language.fromopensubtitles(l) for l in babelfish.language_converters['opensubtitles'].codes]) def __init__(self): self.server = xmlrpclib.ServerProxy('http://api.opensubtitles.org/xml-rpc') diff --git a/subliminal/providers/podnapisi.py b/subliminal/providers/podnapisi.py index a3815f5ab..0148020be 100644 --- a/subliminal/providers/podnapisi.py +++ b/subliminal/providers/podnapisi.py @@ -3,6 +3,7 @@ import io import logging import re +import contextlib import xml.etree.ElementTree import zipfile import babelfish @@ -11,14 +12,17 @@ import guessit import requests from . import Provider -from .. import __version__ from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError from ..subtitle import Subtitle, is_valid_subtitle, compute_guess_matches +from ..subtitle import sanitize_string from ..video import Episode, Movie logger = logging.getLogger(__name__) - +URL_RE = re.compile( + '^((http[s]?|ftp):\/)?\/?([^:\/\s]+)(:([^\/]*))?((\/\w+)*\/)' + \ + '([\w\-\.]+[^#?\s]+)(\?([^#]*))?(#(.*))?$', +) class PodnapisiSubtitle(Subtitle): provider_name = 'podnapisi' @@ -29,7 +33,7 @@ def __init__(self, language, id, releases, hearing_impaired, link, series=None, self.id = id self.releases = releases self.hearing_impaired = hearing_impaired - self.link = link + self.link = '/ppodnapisi' + link self.series = series self.season = season self.episode = episode @@ -41,7 +45,9 @@ def compute_matches(self, video): # episode if isinstance(video, Episode): # series - if video.series and self.series.lower() == video.series.lower(): + if video.series and \ + sanitize_string(self.series) == \ + sanitize_string(video.series): matches.add('series') # season if video.season and self.season == video.season: @@ -55,7 +61,9 @@ def compute_matches(self, video): # movie elif isinstance(video, Movie): # title - if video.title and self.title.lower() == video.title.lower(): + if video.title and \ + sanitize_string(self.title) == \ + sanitize_string(video.title): matches.add('title') # year if video.year and self.year == video.year: @@ -67,31 +75,43 @@ def compute_matches(self, video): class PodnapisiProvider(Provider): - languages = {babelfish.Language.frompodnapisi(l) for l in babelfish.get_language_converter('podnapisi').codes} + languages = set([babelfish.Language.frompodnapisi(l) for l in babelfish.language_converters['podnapisi'].codes]) video_types = (Episode, Movie) server = 'http://simple.podnapisi.net' - link_re = re.compile('^.*(?P/ppodnapisi/download/i/\d+/k/.*$)') + pre_link_re = re.compile('^.*(?P/ppodnapisi/predownload/i/\d+/k/.*$)') + link_re = re.compile('^.*(?P/[a-zA-Z]{2}/ppodnapisi/download/i/\d+/k/.*$)') def initialize(self): self.session = requests.Session() - self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__} + self.session.headers = {'User-Agent': self.primary_user_agent } def terminate(self): self.session.close() - def get(self, url, params=None, is_xml=True): + def get(self, url, params=None, headers=None, is_xml=True): """Make a GET request on `url` with the given parameters :param string url: part of the URL to reach with the leading slash :param dict params: params of the request + :param dict headers: headers of the request :param bool xml: whether the response content is XML or not :return: the response :rtype: :class:`xml.etree.ElementTree.Element` or :class:`bs4.BeautifulSoup` :raise: :class:`~subliminal.exceptions.ProviderNotAvailable` """ + + prefix_url = '' + url_result = URL_RE.search(url) + if url_result and url_result.group(2) is None: + prefix_url = self.server + try: - r = self.session.get(self.server + '/ppodnapisi' + url, params=params, timeout=10) + r = self.session.get( + prefix_url + url, params=params, + headers=headers, + timeout=10, + ) except requests.Timeout: raise ProviderNotAvailable('Timeout after 10 seconds') if r.status_code != 200: @@ -116,20 +136,30 @@ def query(self, language, series=None, season=None, episode=None, title=None, ye logger.debug('Searching episode %r', params) subtitles = [] while True: - root = self.get('/search', params) + root = self.get('/ppodnapisi/search', params) if not int(root.find('pagination/results').text): logger.debug('No subtitle found') break if series and season and episode: - subtitles.extend([PodnapisiSubtitle(language, int(s.find('id').text), s.find('release').text.split(), + try: + subtitles.extend([PodnapisiSubtitle(language, int(s.find('id').text), s.find('release').text.split(), 'h' in (s.find('flags').text or ''), s.find('url').text[38:], series=series, season=season, episode=episode) for s in root.findall('subtitle')]) + except AttributeError: + # there simply wasn't enough information in the TV Show + # gracefully handle this instead of crashing :) + break elif title: - subtitles.extend([PodnapisiSubtitle(language, int(s.find('id').text), s.find('release').text.split(), + try: + subtitles.extend([PodnapisiSubtitle(language, int(s.find('id').text), s.find('release').text.split(), 'h' in (s.find('flags').text or ''), s.find('url').text[38:], title=title, year=year) for s in root.findall('subtitle')]) + except AttributeError: + # there simply wasn't enough information in the movie + # gracefully handle this instead of crashing :) + break if int(root.find('pagination/current').text) >= int(root.find('pagination/count').text): break params['page'] = int(root.find('pagination/current').text) + 1 @@ -144,6 +174,21 @@ def list_subtitles(self, video, languages): def download_subtitle(self, subtitle): soup = self.get(subtitle.link, is_xml=False) + pre_link = soup.find('a', href=self.pre_link_re) + if not pre_link: + raise ProviderError('Cannot find the pre-download link') + pre_link = self.server + \ + self.pre_link_re.match(pre_link['href']).group('link') + + # Continue following the link + soup = self.get( + pre_link, + headers={ + 'Referer': self.server, + }, + is_xml=False, + ) + link = soup.find('a', href=self.link_re) if not link: raise ProviderError('Cannot find the download link') @@ -153,7 +198,7 @@ def download_subtitle(self, subtitle): raise ProviderNotAvailable('Timeout after 10 seconds') if r.status_code != 200: raise ProviderNotAvailable('Request failed with status code %d' % r.status_code) - with zipfile.ZipFile(io.BytesIO(r.content)) as zf: + with contextlib.closing(zipfile.ZipFile(io.BytesIO(r.content))) as zf: if len(zf.namelist()) > 1: raise ProviderError('More than one file to unzip') subtitle_bytes = zf.read(zf.namelist()[0]) diff --git a/subliminal/providers/thesubdb.py b/subliminal/providers/thesubdb.py index e610a9d49..726554f79 100644 --- a/subliminal/providers/thesubdb.py +++ b/subliminal/providers/thesubdb.py @@ -29,7 +29,7 @@ def compute_matches(self, video): class TheSubDBProvider(Provider): - languages = {babelfish.Language.fromalpha2(l) for l in ['en', 'es', 'fr', 'it', 'nl', 'pl', 'pt', 'ro', 'sv', 'tr']} + languages = set([babelfish.Language.fromalpha2(l) for l in ['en', 'es', 'fr', 'it', 'nl', 'pl', 'pt', 'ro', 'sv', 'tr']]) required_hash = 'thesubdb' def initialize(self): @@ -65,7 +65,7 @@ def query(self, hash): # @ReservedAssignment elif r.status_code != 200: raise ProviderError('Request failed with status code %d' % r.status_code) return [TheSubDBSubtitle(language, hash) for language in - {babelfish.Language.fromalpha2(l) for l in r.content.split(',')}] + set([babelfish.Language.fromalpha2(l) for l in r.content.split(',')])] def list_subtitles(self, video, languages): return [s for s in self.query(video.hashes['thesubdb']) if s.language in languages] diff --git a/subliminal/providers/tvsubtitles.py b/subliminal/providers/tvsubtitles.py index e41fda606..615b7aa31 100644 --- a/subliminal/providers/tvsubtitles.py +++ b/subliminal/providers/tvsubtitles.py @@ -3,18 +3,19 @@ import io import logging import re +import contextlib import zipfile import babelfish import bs4 import charade import requests from . import Provider -from .. import __version__ from ..cache import region from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError -from ..subtitle import Subtitle, is_valid_subtitle +from ..subtitle import Subtitle, is_valid_subtitle, sanitize_string from ..video import Episode +IGNORE_DATEMATCH=re.compile('^(.*)[ \t0-9-._)(]*$') logger = logging.getLogger(__name__) @@ -56,9 +57,9 @@ def compute_matches(self, video): class TVsubtitlesProvider(Provider): - languages = {babelfish.Language('por', 'BR')} | {babelfish.Language(l) + languages = set([babelfish.Language('por', 'BR')]) | set([babelfish.Language(l) for l in ['ara', 'bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'fin', 'fra', 'hun', 'ita', 'jpn', 'kor', - 'nld', 'pol', 'por', 'ron', 'rus', 'spa', 'swe', 'tur', 'ukr', 'zho']} + 'nld', 'pol', 'por', 'ron', 'rus', 'spa', 'swe', 'tur', 'ukr', 'zho']]) video_types = (Episode,) server = 'http://www.tvsubtitles.net' episode_id_re = re.compile('^episode-\d+\.html$') @@ -67,7 +68,7 @@ class TVsubtitlesProvider(Provider): def initialize(self): self.session = requests.Session() - self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__} + self.session.headers = {'User-Agent': self.primary_user_agent } def terminate(self): self.session.close() @@ -105,15 +106,34 @@ def find_show_id(self, series): logger.debug('Searching series %r', data) soup = self.request('/search.php', data=data, method='POST') links = soup.select('div.left li div a[href^="/tvshow-"]') + sanitized_series = IGNORE_DATEMATCH.match( + sanitize_string(series).replace('.', ' ').strip(), + ) + if not sanitized_series: + sanitized_series = sanitize_string(series)\ + .replace('.', ' ').strip() + else: + sanitized_series = sanitized_series.group(1) + if not links: logger.info('Series %r not found', series) return None + for link in links: match = self.link_re.match(link.string) if not match: logger.warning('Could not parse %r', link.string) continue - if match.group('series').lower().replace('.', ' ').strip() == series.lower(): + show = IGNORE_DATEMATCH.match( + sanitize_string(match.group('series'))\ + .replace('.', ' ').strip(), + ) + if not show: + logger.warning('Could not postparse %r', match.group('series')) + continue + show = show.group(1) + + if show == sanitized_series: return int(link['href'][8:-5]) return int(links[0]['href'][8:-5]) @@ -139,7 +159,7 @@ def find_episode_ids(self, show_id, season): return episode_ids def query(self, series, season, episode): - show_id = self.find_show_id(series.lower()) + show_id = self.find_show_id(series) if show_id is None: return [] episode_ids = self.find_episode_ids(show_id, season) @@ -165,7 +185,7 @@ def download_subtitle(self, subtitle): raise ProviderNotAvailable('Timeout after 10 seconds') if r.status_code != 200: raise ProviderNotAvailable('Request failed with status code %d' % r.status_code) - with zipfile.ZipFile(io.BytesIO(r.content)) as zf: + with contextlib.closing(zipfile.ZipFile(io.BytesIO(r.content))) as zf: if len(zf.namelist()) > 1: raise ProviderError('More than one file to unzip') subtitle_bytes = zf.read(zf.namelist()[0]) diff --git a/subliminal/subtitle.py b/subliminal/subtitle.py index f7d97f42d..af9086559 100644 --- a/subliminal/subtitle.py +++ b/subliminal/subtitle.py @@ -4,11 +4,15 @@ import os.path import babelfish import pysrt +import re from .video import Episode, Movie logger = logging.getLogger(__name__) +#: The following characters are always stripped +IGNORED_CHARACTERS_RE = re.compile('[!@#$\'"]') + class Subtitle(object): """Base class for subtitle @@ -33,7 +37,7 @@ def compute_matches(self, video): """ raise NotImplementedError - def compute_score(self, video): + def compute_score(self, video, hi_score_adjust=0): """Compute the score of the subtitle against the `video` There are equivalent matches so that a provider can match one element or its equivalent. This is @@ -47,6 +51,7 @@ def compute_score(self, video): :param video: the video to compute the score against :type video: :class:`~subliminal.video.Video` + :param hi_score_adjust: adjust hearing impaired matched videos by this value :return: score of the subtitle :rtype: int @@ -62,20 +67,45 @@ def compute_score(self, video): # remove equivalences if isinstance(video, Episode): if 'imdb_id' in matches: - matches -= {'series', 'tvdb_id', 'season', 'episode', 'title'} + matches -= set(['series', 'tvdb_id', 'season', 'episode', 'title']) if 'tvdb_id' in matches: - matches -= {'series'} + matches -= set(['series',]) if 'title' in matches: - matches -= {'season', 'episode'} + matches -= set(['season', 'episode']) # add other scores - score += sum((video.scores[match] for match in matches)) - logger.info('Computed score %d with matches %r', score, initial_matches) + score += sum([video.scores[match] for match in matches]) + + # Adjust scoring if hearing impaired subtitles are detected + if self.hearing_impaired and hi_score_adjust != 0: + logger.debug('Hearing impaired subtitle score adjusted ' + \ + 'by %d' % hi_score_adjust) + # Priortization (adjust score) + score += hi_score_adjust + + logger.debug('Computed score %d with matches %r', score, initial_matches) return score def __repr__(self): return '<%s [%s]>' % (self.__class__.__name__, self.language) +def sanitize_string(str_in): + """ + Sanitizes a string passed into it by eliminating characters that might + otherwise cause issues when attempting to locate a match on websites by + striping out any special characters and forcing a consistent string that + can be used for caching too. + + :param string str_in: the string to sanitize + :return: sanitized string + :rtype: string + """ + if not isinstance(str_in, basestring): + # handle int, float, etc + str_in = str(str_in) + + return IGNORED_CHARACTERS_RE.sub('', str_in).lower().strip() + def get_subtitle_path(video_path, language=None): """Create the subtitle path from the given `video_path` and `language` diff --git a/subliminal/tests/test_providers.py b/subliminal/tests/test_providers.py index 034e5773f..27515f564 100644 --- a/subliminal/tests/test_providers.py +++ b/subliminal/tests/test_providers.py @@ -30,131 +30,61 @@ def test_find_show_id(self): def test_find_show_id_error(self): with self.Provider() as provider: show_id = provider.find_show_id('the big how i met your mother') - self.assertIsNone(show_id) + self.assertEqual(show_id, None) def test_get_show_ids(self): with self.Provider() as provider: show_ids = provider.get_show_ids() - self.assertIn('the big bang theory', show_ids) + self.assertTrue('the big bang theory' in show_ids) self.assertEqual(show_ids['the big bang theory'], 126) def test_query_episode_0(self): video = EPISODES[0] - languages = {Language('tur'), Language('rus'), Language('heb'), Language('ita'), Language('fra'), + languages = set([Language('rus'), Language('heb'), Language('ita'), Language('fra'), Language('ron'), Language('nld'), Language('eng'), Language('deu'), Language('ell'), - Language('por', 'BR'), Language('bul')} - matches = {frozenset(['episode', 'release_group', 'title', 'series', 'resolution', 'season']), + Language('por', 'BR'), Language('bul')]) + matches = set([frozenset(['episode', 'release_group', 'title', 'series', 'resolution', 'season']), frozenset(['series', 'resolution', 'season']), frozenset(['series', 'episode', 'season', 'title']), frozenset(['series', 'release_group', 'season']), frozenset(['series', 'episode', 'season', 'release_group', 'title']), - frozenset(['series', 'season'])} + frozenset(['series', 'season'])]) with self.Provider() as provider: subtitles = provider.query(video.series, video.season) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_episode_1(self): video = EPISODES[1] - languages = {Language('ind'), Language('spa'), Language('hrv'), Language('ita'), Language('fra'), + languages = set([Language('ind'), Language('spa'), Language('hrv'), Language('ita'), Language('fra'), Language('cat'), Language('ell'), Language('nld'), Language('eng'), Language('fas'), Language('por'), Language('nor'), Language('deu'), Language('ron'), Language('por', 'BR'), - Language('bul')} - matches = {frozenset(['series', 'episode', 'resolution', 'season', 'title']), + Language('bul')]) + matches = set([frozenset(['series', 'episode', 'resolution', 'season', 'title']), frozenset(['series', 'resolution', 'season']), frozenset(['series', 'episode', 'season', 'title']), frozenset(['series', 'release_group', 'season']), frozenset(['series', 'resolution', 'release_group', 'season']), frozenset(['series', 'episode', 'season', 'release_group', 'title']), - frozenset(['series', 'season'])} + frozenset(['series', 'season'])]) with self.Provider() as provider: subtitles = provider.query(video.series, video.season) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_list_subtitles(self): video = EPISODES[0] - languages = {Language('eng'), Language('fra')} - matches = {frozenset(['series', 'episode', 'season', 'release_group', 'title']), - frozenset(['series', 'episode', 'season', 'title'])} + languages = set([Language('eng'), Language('fra')]) + matches = set([frozenset(['series', 'episode', 'season', 'release_group', 'title']), + frozenset(['series', 'episode', 'season', 'title'])]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_download_subtitle(self): video = EPISODES[0] - languages = {Language('eng'), Language('fra')} - with self.Provider() as provider: - subtitles = provider.list_subtitles(video, languages) - subtitle_text = provider.download_subtitle(subtitles[0]) - self.assertTrue(is_valid_subtitle(subtitle_text)) - - -class BierDopjeProviderTestCase(ProviderTestCase): - provider_name = 'bierdopje' - - def test_find_show_id(self): - with self.Provider() as provider: - show_id = provider.find_show_id('The Big Bang') - self.assertEqual(show_id, 9203) - - def test_find_show_id_error(self): - with self.Provider() as provider: - show_id = provider.find_show_id('the big how i met your mother') - self.assertIsNone(show_id) - - def test_query_episode_0(self): - video = EPISODES[0] - language = Language('eng') - matches = {frozenset(['series', 'video_codec', 'resolution', 'episode', 'season']), - frozenset(['season', 'video_codec', 'episode', 'series']), - frozenset(['episode', 'video_codec', 'season', 'series', 'resolution', 'release_group'])} - with self.Provider() as provider: - subtitles = provider.query(language, video.season, video.episode, series=video.series) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, {language}) - - def test_query_episode_1(self): - video = EPISODES[1] - language = Language('nld') - matches = {frozenset(['series', 'video_codec', 'resolution', 'episode', 'season']), - frozenset(['season', 'video_codec', 'episode', 'series']), - frozenset(['series', 'episode', 'season']), - frozenset(['season', 'video_codec', 'episode', 'release_group', 'series']), - frozenset(['episode', 'video_codec', 'season', 'series', 'resolution', 'release_group'])} - with self.Provider() as provider: - subtitles = provider.query(language, video.season, video.episode, series=video.series) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, {language}) - - def test_query_episode_0_tvdb_id(self): - video = EPISODES[0] - language = Language('eng') - matches = {frozenset(['video_codec', 'tvdb_id', 'episode', 'season', 'series']), - frozenset(['episode', 'video_codec', 'series', 'season', 'tvdb_id', 'resolution', 'release_group']), - frozenset(['episode', 'series', 'video_codec', 'tvdb_id', 'resolution', 'season'])} - with self.Provider() as provider: - subtitles = provider.query(language, video.season, video.episode, tvdb_id=video.tvdb_id) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, {language}) - - def test_list_subtitles(self): - video = EPISODES[1] - languages = {Language('eng'), Language('nld')} - matches = {frozenset(['series', 'video_codec', 'tvdb_id', 'episode', 'season']), - frozenset(['episode', 'video_codec', 'season', 'series', 'tvdb_id', 'resolution', 'release_group']), - frozenset(['season', 'tvdb_id', 'episode', 'series']), - frozenset(['episode', 'video_codec', 'season', 'series', 'tvdb_id', 'resolution']), - frozenset(['episode', 'video_codec', 'season', 'series', 'tvdb_id', 'release_group'])} - with self.Provider() as provider: - subtitles = provider.list_subtitles(video, languages) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) - - def test_download_subtitle(self): - video = EPISODES[0] - languages = {Language('eng'), Language('nld')} + languages = set([Language('eng'), Language('fra')]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) subtitle_text = provider.download_subtitle(subtitles[0]) @@ -166,97 +96,97 @@ class OpenSubtitlesProviderTestCase(ProviderTestCase): def test_query_movie_0_query(self): video = MOVIES[0] - languages = {Language('eng')} - matches = {frozenset([]), frozenset(['imdb_id', 'resolution', 'title', 'year']), + languages = set([Language('eng'), ]) + matches = set([frozenset([]), frozenset(['imdb_id', 'resolution', 'title', 'year']), frozenset(['imdb_id', 'title', 'year']), frozenset(['imdb_id', 'video_codec', 'title', 'year']), frozenset(['imdb_id', 'resolution', 'title', 'video_codec', 'year']), - frozenset(['imdb_id', 'title', 'year', 'video_codec', 'resolution', 'release_group'])} + frozenset(['imdb_id', 'title', 'year', 'video_codec', 'resolution', 'release_group'])]) with self.Provider() as provider: subtitles = provider.query(languages, query=video.title) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_episode_0_query(self): video = EPISODES[0] - languages = {Language('eng')} - matches = {frozenset(['series', 'episode', 'season', 'imdb_id']), + languages = set([Language('eng'), ]) + matches = set([frozenset(['series', 'episode', 'season', 'imdb_id']), frozenset(['series', 'imdb_id', 'video_codec', 'episode', 'season']), - frozenset(['episode', 'title', 'series', 'imdb_id', 'video_codec', 'season'])} + frozenset(['episode', 'title', 'series', 'imdb_id', 'video_codec', 'season'])]) with self.Provider() as provider: subtitles = provider.query(languages, query=video.name.split(os.sep)[-1]) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_episode_1_query(self): video = EPISODES[1] - languages = {Language('eng'), Language('fra')} - matches = {frozenset(['episode', 'title', 'series', 'imdb_id', 'video_codec', 'season']), + languages = set([Language('eng'), Language('fra')]) + matches = set([frozenset(['episode', 'title', 'series', 'imdb_id', 'video_codec', 'season']), frozenset(['series', 'imdb_id', 'title', 'episode', 'season']), frozenset(['series', 'imdb_id', 'video_codec', 'episode', 'season']), frozenset(['episode', 'video_codec', 'series', 'imdb_id', 'resolution', 'season']), frozenset(['series', 'imdb_id', 'resolution', 'episode', 'season']), - frozenset(['series', 'episode', 'season', 'imdb_id'])} + frozenset(['series', 'episode', 'season', 'imdb_id'])]) with self.Provider() as provider: subtitles = provider.query(languages, query=video.name.split(os.sep)[-1]) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_movie_0_imdb_id(self): video = MOVIES[0] - languages = {Language('eng'), Language('fra')} - matches = {frozenset(['imdb_id', 'video_codec', 'title', 'year']), + languages = set([Language('eng'), Language('fra')]) + matches = set([frozenset(['imdb_id', 'video_codec', 'title', 'year']), frozenset(['imdb_id', 'resolution', 'title', 'video_codec', 'year']), frozenset(['imdb_id', 'title', 'year', 'video_codec', 'resolution', 'release_group']), frozenset(['imdb_id', 'title', 'year']), - frozenset(['imdb_id', 'resolution', 'title', 'year'])} + frozenset(['imdb_id', 'resolution', 'title', 'year'])]) with self.Provider() as provider: subtitles = provider.query(languages, imdb_id=video.imdb_id) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_episode_0_imdb_id(self): video = EPISODES[0] - languages = {Language('eng'), Language('fra')} - matches = {frozenset(['series', 'episode', 'season', 'imdb_id']), + languages = set([Language('eng'), Language('fra')]) + matches = set([frozenset(['series', 'episode', 'season', 'imdb_id']), frozenset(['episode', 'release_group', 'video_codec', 'series', 'imdb_id', 'resolution', 'season']), frozenset(['series', 'imdb_id', 'video_codec', 'episode', 'season']), - frozenset(['episode', 'title', 'series', 'imdb_id', 'video_codec', 'season'])} + frozenset(['episode', 'title', 'series', 'imdb_id', 'video_codec', 'season'])]) with self.Provider() as provider: subtitles = provider.query(languages, imdb_id=video.imdb_id) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_movie_0_hash(self): video = MOVIES[0] - languages = {Language('eng')} - matches = {frozenset(['hash', 'title', 'video_codec', 'year', 'resolution', 'imdb_id']), + languages = set([Language('eng'), ]) + matches = set([frozenset(['hash', 'title', 'video_codec', 'year', 'resolution', 'imdb_id']), frozenset(['hash', 'title', 'video_codec', 'year', 'resolution', 'release_group', 'imdb_id']), frozenset(['year', 'video_codec', 'imdb_id', 'hash', 'title']), frozenset([]), frozenset(['year', 'resolution', 'imdb_id', 'hash', 'title']), - frozenset(['year', 'imdb_id', 'hash', 'title'])} + frozenset(['year', 'imdb_id', 'hash', 'title'])]) with self.Provider() as provider: subtitles = provider.query(languages, hash=video.hashes['opensubtitles'], size=video.size) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_episode_0_hash(self): video = EPISODES[0] - languages = {Language('eng')} - matches = {frozenset(['series', 'hash']), + languages = set([Language('eng'), ]) + matches = set([frozenset(['series', 'hash']), frozenset(['episode', 'season', 'series', 'imdb_id', 'video_codec', 'hash']), frozenset(['series', 'episode', 'season', 'hash', 'imdb_id']), - frozenset(['series', 'resolution', 'hash', 'video_codec'])} + frozenset(['series', 'resolution', 'hash', 'video_codec'])]) with self.Provider() as provider: subtitles = provider.query(languages, hash=video.hashes['opensubtitles'], size=video.size) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_list_subtitles(self): video = MOVIES[0] - languages = {Language('eng'), Language('fra')} - matches = {frozenset(['title', 'video_codec', 'year', 'resolution', 'release_group', 'imdb_id']), + languages = set([Language('eng'), Language('fra')]) + matches = set([frozenset(['title', 'video_codec', 'year', 'resolution', 'release_group', 'imdb_id']), frozenset(['imdb_id', 'year', 'title']), frozenset(['year', 'video_codec', 'imdb_id', 'resolution', 'title']), frozenset(['hash', 'title', 'video_codec', 'year', 'resolution', 'release_group', 'imdb_id']), @@ -266,15 +196,15 @@ def test_list_subtitles(self): frozenset(['hash', 'title', 'video_codec', 'year', 'resolution', 'imdb_id']), frozenset(['year', 'imdb_id', 'hash', 'title']), frozenset(['video_codec', 'imdb_id', 'year', 'title']), - frozenset(['year', 'imdb_id', 'resolution', 'title'])} + frozenset(['year', 'imdb_id', 'resolution', 'title'])]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_download_subtitle(self): video = MOVIES[0] - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) subtitle_text = provider.download_subtitle(subtitles[0]) @@ -287,44 +217,44 @@ class PodnapisiProviderTestCase(ProviderTestCase): def test_query_movie_0(self): video = MOVIES[0] language = Language('eng') - matches = {frozenset(['video_codec', 'title', 'resolution', 'year']), + matches = set([frozenset(['video_codec', 'title', 'resolution', 'year']), frozenset(['title', 'resolution', 'year']), frozenset(['video_codec', 'title', 'year']), frozenset(['title', 'year']), frozenset(['video_codec', 'title', 'resolution', 'release_group', 'year']), - frozenset(['video_codec', 'title', 'resolution', 'audio_codec', 'year'])} + frozenset(['video_codec', 'title', 'resolution', 'audio_codec', 'year'])]) with self.Provider() as provider: subtitles = provider.query(language, title=video.title, year=video.year) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, {language}) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue([Language, ], [subtitle.language for subtitle in subtitles]) def test_query_episode_0(self): video = EPISODES[0] language = Language('eng') - matches = {frozenset(['episode', 'series', 'season', 'video_codec', 'resolution', 'release_group']), - frozenset(['season', 'video_codec', 'episode', 'resolution', 'series'])} + matches = set([frozenset(['episode', 'series', 'season', 'video_codec', 'resolution', 'release_group']), + frozenset(['season', 'video_codec', 'episode', 'resolution', 'series'])]) with self.Provider() as provider: subtitles = provider.query(language, series=video.series, season=video.season, episode=video.episode) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, {language}) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue([Language, ], [subtitle.language for subtitle in subtitles]) def test_list_subtitles(self): video = MOVIES[0] - languages = {Language('eng'), Language('fra')} - matches = {frozenset(['video_codec', 'title', 'resolution', 'year']), + languages = set([Language('eng'), Language('fra')]) + matches = set([frozenset(['video_codec', 'title', 'resolution', 'year']), frozenset(['title', 'resolution', 'year']), frozenset(['video_codec', 'title', 'year']), frozenset(['title', 'year']), frozenset(['video_codec', 'title', 'resolution', 'release_group', 'year']), - frozenset(['video_codec', 'title', 'resolution', 'audio_codec', 'year'])} + frozenset(['video_codec', 'title', 'resolution', 'audio_codec', 'year'])]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_download_subtitle(self): video = MOVIES[0] - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) subtitle_text = provider.download_subtitle(subtitles[0]) @@ -336,34 +266,34 @@ class TheSubDBProviderTestCase(ProviderTestCase): def test_query_episode_0(self): video = EPISODES[0] - languages = {Language('eng'), Language('spa'), Language('por')} - matches = {frozenset(['hash'])} + languages = set([Language('eng'), Language('spa'), Language('por')]) + matches = set([frozenset(['hash']), ]) with self.Provider() as provider: subtitles = provider.query(video.hashes['thesubdb']) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_episode_1(self): video = EPISODES[1] - languages = {Language('eng'), Language('por')} - matches = {frozenset(['hash'])} + languages = set([Language('eng'), Language('por')]) + matches = set([frozenset(['hash']), ]) with self.Provider() as provider: subtitles = provider.query(video.hashes['thesubdb']) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_list_subtitles(self): video = MOVIES[0] - languages = {Language('eng'), Language('por')} - matches = {frozenset(['hash'])} + languages = set([Language('eng'), Language('por')]) + matches = set([frozenset(['hash']), ]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_download_subtitle(self): video = MOVIES[0] - languages = {Language('eng'), Language('por')} + languages = (Language('eng'), Language('por'), ) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) subtitle_text = provider.download_subtitle(subtitles[0]) @@ -391,7 +321,7 @@ def test_find_show_id_no_dots(self): def test_find_show_id_error(self): with self.Provider() as provider: show_id = provider.find_show_id('the big gaming') - self.assertIsNone(show_id) + self.assertEqual(show_id, None) def test_find_episode_ids(self): with self.Provider() as provider: @@ -400,38 +330,39 @@ def test_find_episode_ids(self): def test_query_episode_0(self): video = EPISODES[0] - languages = {Language('fra'), Language('por'), Language('hun'), Language('ron'), Language('eng')} - matches = {frozenset(['series', 'episode', 'season', 'video_codec']), - frozenset(['series', 'episode', 'season'])} + languages = set([Language('fra'), Language('por'), Language('hun'), Language('ron'), Language('eng')]) + matches = set([frozenset(['series', 'episode', 'resolution', 'season']), + frozenset(['series', 'episode', 'season'])]) with self.Provider() as provider: subtitles = provider.query(video.series, video.season, video.episode) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_query_episode_1(self): video = EPISODES[1] - languages = {Language('fra'), Language('ell'), Language('ron'), Language('eng'), Language('hun'), - Language('por'), Language('por', 'BR')} - matches = {frozenset(['series', 'episode', 'resolution', 'season']), + languages = set([Language('fra'), Language('ell'), Language('ron'), Language('eng'), Language('hun'), + Language('por'), Language('por', 'BR')]) + matches = set([frozenset(['series', 'episode', 'resolution', 'season']), frozenset(['series', 'episode', 'season', 'video_codec']), - frozenset(['series', 'episode', 'season'])} + frozenset(['series', 'episode', 'season'])]) with self.Provider() as provider: subtitles = provider.query(video.series, video.season, video.episode) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_list_subtitles(self): video = EPISODES[0] - languages = {Language('eng'), Language('fra')} - matches = {frozenset(['series', 'episode', 'season'])} + languages = set([Language('eng'), Language('fra')]) + matches = set([frozenset(['series', 'episode', 'resolution', 'season']), + frozenset([u'series', u'episode', u'season'])]) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) - self.assertEqual({frozenset(subtitle.compute_matches(video)) for subtitle in subtitles}, matches) - self.assertEqual({subtitle.language for subtitle in subtitles}, languages) + self.assertTrue(matches - set([frozenset(subtitle.compute_matches(video)) for subtitle in subtitles]) == set([])) + self.assertTrue(languages - set([subtitle.language for subtitle in subtitles]) == set([])) def test_download_subtitle(self): video = EPISODES[0] - languages = {Language('hun')} + languages = (Language('hun'), ) with self.Provider() as provider: subtitles = provider.list_subtitles(video, languages) subtitle_text = provider.download_subtitle(subtitles[0]) @@ -441,7 +372,6 @@ def test_download_subtitle(self): def suite(): suite = TestSuite() suite.addTest(TestLoader().loadTestsFromTestCase(Addic7edProviderTestCase)) - suite.addTest(TestLoader().loadTestsFromTestCase(BierDopjeProviderTestCase)) suite.addTest(TestLoader().loadTestsFromTestCase(OpenSubtitlesProviderTestCase)) suite.addTest(TestLoader().loadTestsFromTestCase(PodnapisiProviderTestCase)) suite.addTest(TestLoader().loadTestsFromTestCase(TheSubDBProviderTestCase)) diff --git a/subliminal/tests/test_subliminal.py b/subliminal/tests/test_subliminal.py index 98e19a13a..f8316a221 100644 --- a/subliminal/tests/test_subliminal.py +++ b/subliminal/tests/test_subliminal.py @@ -21,30 +21,30 @@ def tearDown(self): def test_list_subtitles_movie_0(self): videos = [MOVIES[0]] - languages = {Language('eng')} + languages = set([ Language('eng'), ]) subtitles = list_subtitles(videos, languages) self.assertEqual(len(subtitles), len(videos)) - self.assertGreater(len(subtitles[videos[0]]), 0) + self.assertTrue(len(subtitles[videos[0]]) > 0) def test_list_subtitles_movie_0_por_br(self): videos = [MOVIES[0]] - languages = {Language('por', 'BR')} + languages = set([Language('por', 'BR'), ]) subtitles = list_subtitles(videos, languages) self.assertEqual(len(subtitles), len(videos)) - self.assertGreater(len(subtitles[videos[0]]), 0) + self.assertTrue(len(subtitles[videos[0]]) > 0) def test_list_subtitles_episodes(self): videos = [EPISODES[0], EPISODES[1]] - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) subtitles = list_subtitles(videos, languages) self.assertEqual(len(subtitles), len(videos)) - self.assertGreater(len(subtitles[videos[0]]), 0) + self.assertTrue(len(subtitles[videos[0]]) > 0) def test_download_subtitles(self): videos = [EPISODES[0], EPISODES[1]] for video in videos: video.name = os.path.join(TEST_DIR, video.name.split(os.sep)[-1]) - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) subtitles = list_subtitles(videos, languages) download_subtitles(subtitles) for video in videos: @@ -55,7 +55,7 @@ def test_download_subtitles_single(self): videos = [EPISODES[0], EPISODES[1]] for video in videos: video.name = os.path.join(TEST_DIR, video.name.split(os.sep)[-1]) - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) subtitles = list_subtitles(videos, languages) download_subtitles(subtitles, single=True) for video in videos: @@ -65,10 +65,11 @@ def test_download_best_subtitles(self): videos = [EPISODES[0], EPISODES[1]] for video in videos: video.name = os.path.join(TEST_DIR, video.name.split(os.sep)[-1]) - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) subtitles = download_best_subtitles(videos, languages) for video in videos: - self.assertEqual(video in subtitles and len(subtitles[video]), 2) + self.assertTrue(video in subtitles) + self.assertTrue(len(subtitles[video]) == 2) self.assertTrue(os.path.exists(os.path.splitext(video.name)[0] + '.en.srt')) self.assertTrue(os.path.exists(os.path.splitext(video.name)[0] + '.fr.srt')) @@ -76,10 +77,10 @@ def test_download_best_subtitles_single(self): videos = [EPISODES[0], EPISODES[1]] for video in videos: video.name = os.path.join(TEST_DIR, video.name.split(os.sep)[-1]) - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) subtitles = download_best_subtitles(videos, languages, single=True) for video in videos: - self.assertIn(video, subtitles) + self.assertTrue(video in subtitles) self.assertEqual(len(subtitles[video]), 1) self.assertTrue(os.path.exists(os.path.splitext(video.name)[0] + '.srt')) @@ -87,7 +88,7 @@ def test_download_best_subtitles_min_score(self): videos = [MOVIES[0]] for video in videos: video.name = os.path.join(TEST_DIR, video.name.split(os.sep)[-1]) - languages = {Language('eng'), Language('fra')} + languages = set([Language('eng'), Language('fra')]) subtitles = download_best_subtitles(videos, languages, min_score=1000) self.assertEqual(len(subtitles), 0) @@ -95,7 +96,7 @@ def test_download_best_subtitles_hearing_impaired(self): videos = [MOVIES[0]] for video in videos: video.name = os.path.join(TEST_DIR, video.name.split(os.sep)[-1]) - languages = {Language('eng')} + languages = set([Language('eng'), ]) subtitles = download_best_subtitles(videos, languages, hearing_impaired=True) self.assertTrue(subtitles[videos[0]][0].hearing_impaired) @@ -120,8 +121,8 @@ def test_scan_video_movie(self): self.assertEqual(scanned_video.release_group, video.release_group) self.assertEqual(scanned_video.subtitle_languages, set()) self.assertEqual(scanned_video.hashes, {}) - self.assertIsNone(scanned_video.audio_codec) - self.assertIsNone(scanned_video.imdb_id) + self.assertEqual(scanned_video.audio_codec, None) + self.assertEqual(scanned_video.imdb_id, None) self.assertEqual(scanned_video.size, 0) def test_scan_video_episode(self): @@ -136,23 +137,23 @@ def test_scan_video_episode(self): self.assertEqual(scanned_video.release_group, video.release_group) self.assertEqual(scanned_video.subtitle_languages, set()) self.assertEqual(scanned_video.hashes, {}) - self.assertIsNone(scanned_video.title) - self.assertIsNone(scanned_video.tvdb_id) - self.assertIsNone(scanned_video.imdb_id) - self.assertIsNone(scanned_video.audio_codec) + self.assertEqual(scanned_video.title, None) + self.assertEqual(scanned_video.tvdb_id, None) + self.assertEqual(scanned_video.imdb_id, None) + self.assertEqual(scanned_video.audio_codec, None) self.assertEqual(scanned_video.size, 0) def test_scan_video_subtitle_language_und(self): video = EPISODES[0] open(os.path.join(TEST_DIR, os.path.splitext(os.path.split(video.name)[1])[0]) + '.srt', 'w').close() scanned_video = scan_video(os.path.join(TEST_DIR, os.path.split(video.name)[1])) - self.assertEqual(scanned_video.subtitle_languages, {Language('und')}) + self.assertEqual(scanned_video.subtitle_languages, set([Language('und'), ])) def test_scan_video_subtitles_language_eng(self): video = EPISODES[0] open(os.path.join(TEST_DIR, os.path.splitext(os.path.split(video.name)[1])[0]) + '.en.srt', 'w').close() scanned_video = scan_video(os.path.join(TEST_DIR, os.path.split(video.name)[1])) - self.assertEqual(scanned_video.subtitle_languages, {Language('eng')}) + self.assertEqual(scanned_video.subtitle_languages, set([Language('eng'), ])) def test_scan_video_subtitles_languages(self): video = EPISODES[0] @@ -160,7 +161,7 @@ def test_scan_video_subtitles_languages(self): open(os.path.join(TEST_DIR, os.path.splitext(os.path.split(video.name)[1])[0]) + '.fr.srt', 'w').close() open(os.path.join(TEST_DIR, os.path.splitext(os.path.split(video.name)[1])[0]) + '.srt', 'w').close() scanned_video = scan_video(os.path.join(TEST_DIR, os.path.split(video.name)[1])) - self.assertEqual(scanned_video.subtitle_languages, {Language('eng'), Language('fra'), Language('und')}) + self.assertEqual(scanned_video.subtitle_languages, set([Language('eng'), Language('fra'), Language('und')])) def suite(): diff --git a/subliminal/video.py b/subliminal/video.py index 017aa596e..447ac0628 100644 --- a/subliminal/video.py +++ b/subliminal/video.py @@ -71,6 +71,10 @@ def __repr__(self): def __hash__(self): return hash(self.name) + def __eq__(self, other): + return self.__class__.__name__ == other.__class__.__name__\ + and self.name == other.name + class Episode(Video): """Episode :class:`Video` @@ -112,6 +116,18 @@ def fromguess(cls, name, guess): def __repr__(self): return '<%s [%r, %rx%r]>' % (self.__class__.__name__, self.series, self.season, self.episode) + def __hash__(self): + return hash(( + self.series, + self.season, + self.episode, + )) + + def __eq__(self, other): + return self.__class__.__name__ == other.__class__.__name__\ + and self.series == other.series\ + and self.season == other.season\ + and self.episode == other.episode class Movie(Video): """Movie :class:`Video` @@ -147,6 +163,18 @@ def __repr__(self): return '<%s [%r]>' % (self.__class__.__name__, self.title) return '<%s [%r, %r]>' % (self.__class__.__name__, self.title, self.year) + def __hash__(self): + if self.year is None: + return hash(( + self.title, + self.year, + )) + return hash(self.title) + + def __eq__(self, other): + return self.__class__.__name__ == other.__class__.__name__\ + and self.title == other.title\ + and self.year == other.year def scan_subtitle_languages(path): """Search for subtitles with alpha2 extension from a video `path` and return their language @@ -156,7 +184,7 @@ def scan_subtitle_languages(path): :rtype: set """ - language_extensions = tuple('.' + c for c in babelfish.get_language_converter('alpha2').codes) + language_extensions = tuple('.' + c for c in babelfish.language_converters['alpha2'].codes) dirpath, filename = os.path.split(path) subtitles = set() for p in os.listdir(dirpath): @@ -169,12 +197,14 @@ def scan_subtitle_languages(path): return subtitles -def scan_video(path, subtitles=True, embedded_subtitles=True): +def scan_video(path, subtitles=True, embedded_subtitles=True, video=None): """Scan a video and its subtitle languages from a video `path` :param string path: absolute path to the video :param bool subtitles: scan for subtitles with the same name :param bool embedded_subtitles: scan for embedded subtitles + :parm :class:`Video`: optionally specify a video if you've already detected on + by other means. :return: the scanned video :rtype: :class:`Video` :raise: ValueError if cannot guess enough information from the path @@ -182,7 +212,12 @@ def scan_video(path, subtitles=True, embedded_subtitles=True): """ dirpath, filename = os.path.split(path) logger.info('Scanning video %r in %r', filename, dirpath) - video = Video.fromguess(path, guessit.guess_file_info(path, 'autodetect')) + if not video: + video = Video.fromguess( + path, + guessit.guess_file_info(path, info=['filename']), + ) + video.size = os.path.getsize(path) if video.size > 10485760: logger.debug('Size is %d', video.size) @@ -292,12 +327,12 @@ def scan_videos(paths, subtitles=True, embedded_subtitles=True, age=None): for dirpath, dirnames, filenames in os.walk(path): # skip badly encoded directories if isinstance(dirpath, bytes): - logger.error('Skipping badly encoded directory %r', dirpath.decode('utf-8', errors='replace')) + logger.error('Skipping badly encoded directory %r', dirpath.decode('utf-8', 'replace')) continue # skip badly encoded and hidden sub directories for dirname in list(dirnames): if isinstance(dirname, bytes): - logger.error('Skipping badly encoded dirname %r in %r', dirname.decode('utf-8', errors='replace'), + logger.error('Skipping badly encoded dirname %r in %r', dirname.decode('utf-8', 'replace'), dirpath) dirnames.remove(dirname) elif dirname.startswith('.'): @@ -307,7 +342,7 @@ def scan_videos(paths, subtitles=True, embedded_subtitles=True, age=None): for filename in filenames: # skip badly encoded files if isinstance(filename, bytes): - logger.error('Skipping badly encoded filename %r in %r', filename.decode('utf-8', errors='replace'), + logger.error('Skipping badly encoded filename %r in %r', filename.decode('utf-8', 'replace'), dirpath) continue # filter videos