Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.7.x #404

Merged
merged 28 commits into from
Mar 4, 2015
Merged

0.7.x #404

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
3b832a4
updated guessit and babelfish minimum requirements
caronc Sep 18, 2014
e6dc714
Eliminated Dict Comprehensions (PEP 274) references to allow sublimin…
caronc Sep 18, 2014
14c7443
applied guessit v0.7 support
caronc Sep 18, 2014
71d206a
allow searching for subtitles by best score; not exclusively hearing …
caronc Sep 18, 2014
73a4e5a
subliminal bugfix to prevent multiple matched subtitles from differen…
caronc Sep 18, 2014
3b52a93
added ability to prioritize multiple matched subtitles; Download Hear…
caronc Sep 18, 2014
88a2cb9
added graceful handling of subtitle providers that are simply offline…
caronc Sep 18, 2014
4702284
podnapisi website changes maded in Aug 2014 broke this provider in su…
caronc Sep 18, 2014
1e9588e
Added support for titles that contain quotes
caronc Oct 18, 2014
827c75f
better handling of duplicate download prevention
caronc Nov 11, 2014
6cdf189
Fixed TVSubtitles.net matching
caronc Nov 11, 2014
87aefa3
Massive overhaul on testing to make it Python v2.6 compatible
caronc Nov 11, 2014
90d06e2
logging output slighly adjusted (added some clarity and removed some …
caronc Nov 11, 2014
d635269
removed bierdopje provider since it's not referenced anymore anyway (…
caronc Nov 11, 2014
3c7634f
eliminated bierdopje entry point to fix testing issue
caronc Nov 11, 2014
bc575ea
comparison functions added to help with 3rd party filtering
caronc Nov 30, 2014
7fef3cf
scoring adjustments moved into compute_score() and some logging cleanup
caronc Nov 30, 2014
e6bd704
scan_video() now takes pre-guessed video as optional input. This grants
caronc Dec 7, 2014
d45e3b0
improved logging for provider debugging
caronc Jan 26, 2015
a25640b
fixes #425; I'm not proud of this fix; but it resolves the issue.
caronc Jan 26, 2015
3acbefb
fixes #428; I'm not proud of this fix; but it resolves the issue.
caronc Jan 26, 2015
6c28336
Merge branch '0.7.x' of github:caronc/subliminal into 0.7.x
caronc Jan 28, 2015
e45fe70
updated requirements
caronc Jan 29, 2015
a86694d
updated requirements so that Travis CI doesn't haul in guessit v0.10
caronc Jan 29, 2015
1b7291e
Merge branch '0.7.x' of github:caronc/subliminal into 0.7.x
caronc Jan 30, 2015
c74aa01
random user agent determined at runtime; default user-agent reference…
caronc Feb 1, 2015
16ddb8a
addic7ed timeout reverted back to 10 second timeout
caronc Feb 1, 2015
407cbe1
series/movie title sanitization refactored
caronc Feb 1, 2015
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
beautifulsoup4>=4.3.2
guessit>=0.6.2,<0.7
guessit>=0.7,<0.10
requests>=2.0.1
enzyme>=0.4.0
html5lib>=0.99
dogpile.cache>=0.5.2
babelfish>=0.4.0,<0.5
babelfish>=0.5.0
charade>=1.0.3
pysrt>=0.5.0
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
entry_points={
'console_scripts': ['subliminal = subliminal.cli:subliminal'],
'subliminal.providers': ['addic7ed = subliminal.providers.addic7ed:Addic7edProvider',
'bierdopje = subliminal.providers.bierdopje:BierDopjeProvider',
'opensubtitles = subliminal.providers.opensubtitles:OpenSubtitlesProvider',
'podnapisi = subliminal.providers.podnapisi:PodnapisiProvider',
'thesubdb = subliminal.providers.thesubdb:TheSubDBProvider',
Expand Down
5 changes: 4 additions & 1 deletion subliminal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,8 @@
from .subtitle import Subtitle
from .video import VIDEO_EXTENSIONS, SUBTITLE_EXTENSIONS, Video, Episode, Movie, scan_videos, scan_video

class NullHandler(logging.Handler):
def emit(self, record):
pass

logging.getLogger(__name__).addHandler(logging.NullHandler())
logging.getLogger(__name__).addHandler(NullHandler())
112 changes: 85 additions & 27 deletions subliminal/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
import operator
import babelfish
import pkg_resources
from os.path import basename
from .exceptions import ProviderNotAvailable, InvalidSubtitle
from .subtitle import get_subtitle_path

from socket import error as socket_error

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -67,16 +68,21 @@ def list_subtitles(videos, languages, providers=None, provider_configs=None):
provider_entry_point.name, provider_video, provider_video_languages)
try:
provider_subtitles = provider.list_subtitles(provider_video, provider_video_languages)
except ProviderNotAvailable:
except ProviderNotAvailable as err:
logger.warning('Provider %r is not available, discarding it', provider_entry_point.name)
logger.debug('ProviderNotAvailable error: %r', str(err))
break
except:
logger.exception('Unexpected error in provider %r', provider_entry_point.name)
continue
logger.info('Found %d subtitles', len(provider_subtitles))
logger.info('Found %d subtitle(s) on %s' % (
len(provider_subtitles),
provider_entry_point.name,
))
subtitles[provider_video].extend(provider_subtitles)
except ProviderNotAvailable:
except ProviderNotAvailable as err:
logger.warning('Provider %r is not available, discarding it', provider_entry_point.name)
logger.debug('ProviderNotAvailable error: %r', str(err))
return subtitles


Expand All @@ -92,15 +98,19 @@ def download_subtitles(subtitles, provider_configs=None, single=False):
"""
provider_configs = provider_configs or {}
discarded_providers = set()
providers_by_name = {ep.name: ep.load() for ep in pkg_resources.iter_entry_points(PROVIDERS_ENTRY_POINT)}
providers_by_name = dict([(ep.name, ep.load()) for ep in pkg_resources.iter_entry_points(PROVIDERS_ENTRY_POINT)])

initialized_providers = {}
downloaded_subtitles = collections.defaultdict(list)
fetched_subtitles = set()
try:
for video, video_subtitles in subtitles.items():
languages = {subtitle.language for subtitle in video_subtitles}
languages = set([subtitle.language for subtitle in video_subtitles])
downloaded_languages = set()
for subtitle in video_subtitles:
# filter
if subtitle.language in downloaded_languages:
logger.debug('Skipping subtitle: %r already downloaded', subtitle.language)
continue
if subtitle.provider_name in discarded_providers:
logger.debug('Skipping subtitle from discarded provider %r', subtitle.provider_name)
Expand All @@ -113,19 +123,35 @@ def download_subtitles(subtitles, provider_configs=None, single=False):
provider = providers_by_name[subtitle.provider_name](**provider_configs.get(subtitle.provider_name, {}))
try:
provider.initialize()
except ProviderNotAvailable:
except ProviderNotAvailable as err:
logger.warning('Provider %r is not available, discarding it', subtitle.provider_name)
logger.debug('ProviderNotAvailable error: %r', str(err))
discarded_providers.add(subtitle.provider_name)
continue
except socket_error as err:
logger.warning('Provider %r is not responding, discarding it', subtitle.provider_name)
logger.debug('Provider socket error: %r', str(err))
discarded_providers.add(subtitle.provider_name)
continue
except:
logger.exception('Unexpected error in provider %r', subtitle.provider_name)
discarded_providers.add(subtitle.provider_name)
continue
initialized_providers[subtitle.provider_name] = provider

# download subtitles
subtitle_path = get_subtitle_path(video.name, None if single else subtitle.language)
if basename(subtitle_path) in fetched_subtitles:
logger.debug('Skipping subtitle already retrieved %r', basename(subtitle_path))
continue

logger.info('Downloading subtitle %r into %r', subtitle, subtitle_path)
try:
subtitle_text = provider.download_subtitle(subtitle)
except ProviderNotAvailable:
downloaded_subtitles[video].append(subtitle)
except ProviderNotAvailable as err:
logger.warning('Provider %r is not available, discarding it', subtitle.provider_name)
logger.debug('ProviderNotAvailable error: %r', str(err))
discarded_providers.add(subtitle.provider_name)
continue
except InvalidSubtitle:
Expand All @@ -136,8 +162,9 @@ def download_subtitles(subtitles, provider_configs=None, single=False):
continue
with io.open(subtitle_path, 'w', encoding='utf-8') as f:
f.write(subtitle_text)
downloaded_languages.add(subtitle.language)
if single or downloaded_languages == languages:
downloaded_languages.add(subtitle.language)
fetched_subtitles.add(basename(subtitle_path))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The language check should be enough here, why the fetched_subtitles check?

if single or sorted(downloaded_languages) == sorted(languages):
break
finally: # terminate providers
for (provider_name, provider) in initialized_providers.items():
Expand All @@ -147,10 +174,11 @@ def download_subtitles(subtitles, provider_configs=None, single=False):
logger.warning('Provider %r is not available, unable to terminate', provider_name)
except:
logger.exception('Unexpected error in provider %r', provider_name)
return downloaded_subtitles


def download_best_subtitles(videos, languages, providers=None, provider_configs=None, single=False, min_score=0,
hearing_impaired=False):
hearing_impaired=False, hi_score_adjust=0):
"""Download the best subtitles for `videos` with the given `languages` using the specified `providers`

:param videos: videos to download subtitles for
Expand All @@ -164,11 +192,13 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs=
:param bool single: download with .srt extension if `True`, add language identifier otherwise
:param int min_score: minimum score for subtitles to download
:param bool hearing_impaired: download hearing impaired subtitles
:param int hi_score_adjust: Adjust hearing_impaired_scores if matched.

"""
provider_configs = provider_configs or {}
discarded_providers = set()
downloaded_subtitles = collections.defaultdict(list)
fetched_subtitles = set()
# filter videos
videos = [v for v in videos if v.subtitle_languages & languages < languages
and (not single or babelfish.Language('und') not in v.subtitle_languages)]
Expand All @@ -184,27 +214,37 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs=
continue
Provider = provider_entry_point.load()
if not Provider.languages & languages - subtitle_languages:
logger.info('Skipping provider %r: no language to search for', provider_entry_point.name)
logger.debug('Skipping provider %r: no language to search for', provider_entry_point.name)
continue
if not [v for v in videos if Provider.check(v)]:
logger.info('Skipping provider %r: no video to search for', provider_entry_point.name)
logger.debug('Skipping provider %r: video type not hosted here.', provider_entry_point.name)
continue
provider = Provider(**provider_configs.get(provider_entry_point.name, {}))
try:
provider.initialize()
except ProviderNotAvailable:
except ProviderNotAvailable as err:
logger.warning('Provider %r is not available, discarding it', provider_entry_point.name)
logger.debug('ProviderNotAvailable error: %r', str(err))
continue
except socket_error as err:
logger.warning('Provider %r is not responding, discarding it', provider_entry_point.name)
logger.debug('Provider socket error: %r', str(err))
continue
except:
logger.exception('Unexpected error in provider %r', provider_entry_point.name)
continue
initialized_providers[provider_entry_point.name] = provider
try:
for video in videos:
# search for subtitles
subtitles = []
downloaded_languages = set()
for provider_name, provider in initialized_providers.items():
if provider.check(video):
if provider_name in discarded_providers:
logger.debug('Skipping discarded provider %r', provider_name)
continue

provider_video_languages = provider.languages & languages - video.subtitle_languages
if not provider_video_languages:
logger.debug('Skipping provider %r: no language to search for for video %r', provider_name,
Expand All @@ -214,43 +254,56 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs=
provider_name, video, provider_video_languages)
try:
provider_subtitles = provider.list_subtitles(video, provider_video_languages)
except ProviderNotAvailable:
except ProviderNotAvailable as err:
logger.warning('Provider %r is not available, discarding it', provider_name)
logger.debug('ProviderNotAvailable error: %r', str(err))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use exc_info=True as kwargs for logger.debug to add trace information

discarded_providers.add(provider_name)
continue
except:
logger.exception('Unexpected error in provider %r', provider_name)
continue
logger.info('Found %d subtitles', len(provider_subtitles))
logger.info('Found %d subtitle(s) on %s' % (
len(provider_subtitles),
provider_name,
))
subtitles.extend(provider_subtitles)

# find the best subtitles and download them
downloaded_languages = video.subtitle_languages.copy()
for subtitle, score in sorted([(s, s.compute_score(video)) for s in subtitles],
key=operator.itemgetter(1), reverse=True):
for subtitle, score in sorted([(s, s.compute_score(video, hi_score_adjust)) \
for s in subtitles], key=operator.itemgetter(1), reverse=True):

# filter
if subtitle.provider_name in discarded_providers:
logger.debug('Skipping subtitle from discarded provider %r', subtitle.provider_name)
continue
if subtitle.hearing_impaired != hearing_impaired:
logger.debug('Skipping subtitle: hearing impaired != %r', hearing_impaired)
continue

if hearing_impaired is not None:
if subtitle.hearing_impaired != hearing_impaired:
logger.debug('Skipping subtitle: hearing impaired != %r', hearing_impaired)
continue

if score < min_score:
logger.debug('Skipping subtitle: score < %d', min_score)
continue

if subtitle.language in downloaded_languages:
logger.debug('Skipping subtitle: %r already downloaded', subtitle.language)
continue

# download
provider = initialized_providers[subtitle.provider_name]
subtitle_path = get_subtitle_path(video.name, None if single else subtitle.language)
if basename(subtitle_path) in fetched_subtitles:
logger.debug('Skipping subtitle already retrieved %r', basename(subtitle_path))
continue

logger.info('Downloading subtitle %r with score %d into %r', subtitle, score, subtitle_path)
try:
subtitle_text = provider.download_subtitle(subtitle)
downloaded_subtitles[video].append(subtitle)
except ProviderNotAvailable:
except ProviderNotAvailable as err:
logger.warning('Provider %r is not available, discarding it', subtitle.provider_name)
logger.debug('ProviderNotAvailable error: %r', str(err))
discarded_providers.add(subtitle.provider_name)
continue
except InvalidSubtitle:
Expand All @@ -261,16 +314,21 @@ def download_best_subtitles(videos, languages, providers=None, provider_configs=
continue
with io.open(subtitle_path, 'w', encoding='utf-8') as f:
f.write(subtitle_text)
downloaded_languages.add(subtitle.language)
if single or downloaded_languages >= languages:
logger.debug('All languages downloaded')
downloaded_languages.add(subtitle.language)
fetched_subtitles.add(basename(subtitle_path))
if single or sorted(downloaded_languages) == sorted(languages):
break

finally: # terminate providers
for (provider_name, provider) in initialized_providers.items():
try:
provider.terminate()
except ProviderNotAvailable:
except ProviderNotAvailable as err:
logger.warning('Provider %r is not available, unable to terminate', provider_name)
logger.debug('ProviderNotAvailable error: %r', str(err))
except socket_error as err:
logger.warning('Provider %r is not available, unable to terminate', provider_name)
logger.debug('Provider socket error: %r', str(err))
except:
logger.exception('Unexpected error in provider %r', provider_name)
return downloaded_subtitles
2 changes: 1 addition & 1 deletion subliminal/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


#: Subliminal's cache version
CACHE_VERSION = 1
CACHE_VERSION = 2


def subliminal_key_generator(namespace, fn, to_str=string_type):
Expand Down
16 changes: 3 additions & 13 deletions subliminal/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,6 @@ def subliminal():
filtering_group.add_argument('-f', '--force', action='store_true',
help='force subtitle download for videos with existing subtitles')

# addic7ed
addic7ed_group = parser.add_argument_group('addic7ed')
addic7ed_group.add_argument('--addic7ed-username', metavar='USERNAME', help='username for addic7ed provider')
addic7ed_group.add_argument('--addic7ed-password', metavar='PASSWORD', help='password for addic7ed provider')

# output
output_group = parser.add_argument_group('output')
output_exclusive_group = output_group.add_mutually_exclusive_group()
Expand All @@ -81,7 +76,7 @@ def subliminal():

# parse languages
try:
args.languages = {babelfish.Language.fromietf(l) for l in args.languages}
args.languages = set( babelfish.Language.fromietf(l) for l in args.languages )
except babelfish.Error:
parser.error('argument -l/--languages: codes are not IETF: %r' % args.languages)

Expand All @@ -90,7 +85,7 @@ def subliminal():
match = re.match(r'^(?:(?P<weeks>\d+?)w)?(?:(?P<days>\d+?)d)?(?:(?P<hours>\d+?)h)?$', args.age)
if not match:
parser.error('argument -a/--age: invalid age: %r' % args.age)
args.age = datetime.timedelta(**{k: int(v) for k, v in match.groupdict(0).items()})
args.age = datetime.timedelta(**dict([(k, int(v)) for k, v in match.groupdict(0).items()]))

# parse cache-file
args.cache_file = os.path.abspath(os.path.expanduser(args.cache_file))
Expand All @@ -100,11 +95,6 @@ def subliminal():

# parse provider configs
provider_configs = {}
if (args.addic7ed_username is not None and args.addic7ed_password is None
or args.addic7ed_username is None and args.addic7ed_password is not None):
parser.error('argument --addic7ed-username/--addic7ed-password: both arguments are required or none')
if args.addic7ed_username is not None and args.addic7ed_password is not None:
provider_configs['addic7ed'] = {'username': args.addic7ed_username, 'password': args.addic7ed_password}

# parse color
if args.color and colorlog is None:
Expand Down Expand Up @@ -146,7 +136,7 @@ def subliminal():
embedded_subtitles=not args.force, age=args.age)

# guess videos
videos.extend([Video.fromguess(os.path.split(p)[1], guessit.guess_file_info(p, 'autodetect')) for p in args.paths
videos.extend([Video.fromguess(os.path.split(p)[1], guessit.guess_file_info(p, info=['filename'])) for p in args.paths
if not os.path.exists(p)])

# download best subtitles
Expand Down
4 changes: 2 additions & 2 deletions subliminal/converters/addic7ed.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from babelfish import LanguageReverseConverter, get_language_converter
from babelfish import LanguageReverseConverter, language_converters


class Addic7edConverter(LanguageReverseConverter):
def __init__(self):
self.name_converter = get_language_converter('name')
self.name_converter = language_converters['name']
self.from_addic7ed = {'Català': ('cat',), 'Chinese (Simplified)': ('zho',), 'Chinese (Traditional)': ('zho',),
'Euskera': ('eus',), 'Galego': ('glg',), 'Greek': ('ell',), 'Malay': ('msa',),
'Portuguese (Brazilian)': ('por', 'BR'), 'Serbian (Cyrillic)': ('srp', None, 'Cyrl'),
Expand Down
Loading