Skip to content

Commit

Permalink
fix premium videos length and view count scraping failure
Browse files Browse the repository at this point in the history
  • Loading branch information
filyp committed Aug 3, 2021
1 parent d707e21 commit aa0d103
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 11 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "yourtube"
version = "0.5.3"
version = "0.5.4"
description = "Better youtube recommendations"
authors = ["Filip Sondej <filipsondej@protonmail.com>"]
license = "LGPL-2.1"
Expand Down
2 changes: 1 addition & 1 deletion yourtube/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.5.3"
__version__ = "0.5.4"

import os
import subprocess
Expand Down
14 changes: 13 additions & 1 deletion yourtube/debug.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" %load_ext lab_black\n",
"except ModuleNotFoundError:\n",
" print(\"nb_black not installed\")"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -13,7 +25,7 @@
"sys.path.append(os.path.abspath(\"..\"))\n",
"import matplotlib.pyplot as plt\n",
"\n",
"from yourtube.scraping import get_content, scrape_content"
"from yourtube.scraping import *"
]
},
{
Expand Down
3 changes: 2 additions & 1 deletion yourtube/file_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
it is absent if the video hasn't been watched
view_count:
number of views on youtube
can be None if the video is premium (so the views are hidden)
can be None if the video is premium (so the views are ambiguous)
like_count:
number of likes on youtube
can be None if likes are disabled
Expand All @@ -54,6 +54,7 @@
category of the video
length:
video length in seconds
can be None if the video is premium (so the views are ambiguous)
keywords:
keywords of the video as a list of strings
can be an empty list if there are no keywords
Expand Down
16 changes: 9 additions & 7 deletions yourtube/scraping.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,12 @@ def get_title(content):
def get_view_count(content):
candidates = re.findall(r'"viewCount":"([0-9]+)"', content.text)
candidates = set(candidates)
assert len(candidates) <= 1
if candidates:
view_count = candidates.pop()
return int(view_count)
else:
# view count is absent for example in premium videos
assert 1 <= len(candidates) <= 2
if len(candidates) == 2:
# premium videos list 2 different video versions
return None
view_count = candidates.pop()
return int(view_count)


def get_like_count(content):
Expand Down Expand Up @@ -99,7 +98,10 @@ def get_category(content):
def get_length(content):
candidates = re.findall(r'"videoDetails":.*?"lengthSeconds":"(.*?)"', content.text)
candidates = set(candidates)
assert len(candidates) == 1
assert 1 <= len(candidates) <= 2
if len(candidates) == 2:
# premium videos list 2 different lengths
return None
length = candidates.pop()
return int(length)

Expand Down

0 comments on commit aa0d103

Please sign in to comment.