Skip to content

Commit

Permalink
[pornhub] Add support for authentication (closes #18797, closes #21416,
Browse files Browse the repository at this point in the history
closes #24294)
  • Loading branch information
dstftw committed Feb 3, 2021
1 parent 83031d7 commit e22ff4e
Showing 1 changed file with 76 additions and 30 deletions.
106 changes: 76 additions & 30 deletions youtube_dl/extractor/pornhub.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
remove_quotes,
str_to_int,
update_url_query,
urlencode_postdata,
url_or_none,
)

Expand Down Expand Up @@ -53,6 +54,66 @@ def dl(*args, **kwargs):

return webpage, urlh

def _real_initialize(self):
self._logged_in = False

def _login(self, host):
if self._logged_in:
return

site = host.split('.')[0]

# Both sites pornhub and pornhubpremium have separate accounts
# so there should be an option to provide credentials for both.
# At the same time some videos are available under the same video id
# on both sites so that we have to identify them as the same video.
# For that purpose we have to keep both in the same extractor
# but under different netrc machines.
username, password = self._get_login_info(netrc_machine=site)
if username is None:
return

login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
login_page = self._download_webpage(
login_url, None, 'Downloading %s login page' % site)

def is_logged(webpage):
return any(re.search(p, webpage) for p in (
r'class=["\']signOut',
r'>Sign\s+[Oo]ut\s*<'))

if is_logged(login_page):
self._logged_in = True
return

login_form = self._hidden_inputs(login_page)

login_form.update({
'username': username,
'password': password,
})

response = self._download_json(
'https://www.%s/front/authenticate' % host, None,
'Logging in to %s' % site,
data=urlencode_postdata(login_form),
headers={
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Referer': login_url,
'X-Requested-With': 'XMLHttpRequest',
})

if response.get('success') == '1':
self._logged_in = True
return

message = response.get('message')
if message is not None:
raise ExtractorError(
'Unable to login: %s' % message, expected=True)

raise ExtractorError('Unable to log in')


class PornHubIE(PornHubBaseIE):
IE_DESC = 'PornHub and Thumbzilla'
Expand Down Expand Up @@ -164,12 +225,20 @@ class PornHubIE(PornHubBaseIE):
}, {
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
'only_matching': True,
}, {
# Some videos are available with the same id on both premium
# and non-premium sites (e.g. this and the following test)
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
'only_matching': True,
}, {
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
'only_matching': True,
}]

@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)',
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
webpage)

def _extract_count(self, pattern, webpage, name):
Expand All @@ -181,12 +250,7 @@ def _real_extract(self, url):
host = mobj.group('host') or 'pornhub.com'
video_id = mobj.group('id')

if 'premium' in host:
if not self._downloader.params.get('cookiefile'):
raise ExtractorError(
'PornHub Premium requires authentication.'
' You may want to use --cookies.',
expected=True)
self._login(host)

self._set_cookie(host, 'age_verified', '1')

Expand Down Expand Up @@ -427,26 +491,6 @@ def _extract_entries(self, webpage, host):
container))
]

def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host')
playlist_id = mobj.group('id')

webpage = self._download_webpage(url, playlist_id)

entries = self._extract_entries(webpage, host)

playlist = self._parse_json(
self._search_regex(
r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
'playlist', default='{}'),
playlist_id, fatal=False)
title = playlist.get('title') or self._search_regex(
r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)

return self.playlist_result(
entries, playlist_id, title, playlist.get('description'))


class PornHubUserIE(PornHubPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
Expand Down Expand Up @@ -506,12 +550,14 @@ def _real_extract(self, url):
host = mobj.group('host')
item_id = mobj.group('id')

self._login(host)

page = self._extract_page(url)

VIDEOS = '/videos'

def download_page(base_url, num):
note = 'Downloading %spage %d' % ('' if VIDEOS in base_url else 'fallback ', num)
def download_page(base_url, num, fallback=False):
note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
return self._download_webpage(
base_url, item_id, note, query={'page': num})

Expand All @@ -532,7 +578,7 @@ def is_404(e):
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
if is_404(e) and page_num == first_page and VIDEOS in base_url:
base_url = base_url.replace(VIDEOS, '')
webpage = download_page(base_url, page_num)
webpage = download_page(base_url, page_num, fallback=True)
else:
raise
except ExtractorError as e:
Expand Down

0 comments on commit e22ff4e

Please sign in to comment.