diff --git a/pyproject.toml b/pyproject.toml index b1f8091..a6da013 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "yourtube" -version = "0.5.3" +version = "0.5.4" description = "Better youtube recommendations" authors = ["Filip Sondej "] license = "LGPL-2.1" diff --git a/yourtube/__init__.py b/yourtube/__init__.py index c7d485c..b3ff452 100644 --- a/yourtube/__init__.py +++ b/yourtube/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.5.3" +__version__ = "0.5.4" import os import subprocess diff --git a/yourtube/debug.ipynb b/yourtube/debug.ipynb index 434e934..112871a 100644 --- a/yourtube/debug.ipynb +++ b/yourtube/debug.ipynb @@ -1,5 +1,17 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " %load_ext lab_black\n", + "except ModuleNotFoundError:\n", + " print(\"nb_black not installed\")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -13,7 +25,7 @@ "sys.path.append(os.path.abspath(\"..\"))\n", "import matplotlib.pyplot as plt\n", "\n", - "from yourtube.scraping import get_content, scrape_content" + "from yourtube.scraping import *" ] }, { diff --git a/yourtube/file_operations.py b/yourtube/file_operations.py index a3fd6e9..f9620f7 100644 --- a/yourtube/file_operations.py +++ b/yourtube/file_operations.py @@ -43,7 +43,7 @@ it is absent if the video hasn't been watched view_count: number of views on youtube - can be None if the video is premium (so the views are hidden) + can be None if the video is premium (so the views are ambiguous) like_count: number of likes on youtube can be None if likes are disabled @@ -54,6 +54,7 @@ category of the video length: video length in seconds + can be None if the video is premium (so the views are ambiguous) keywords: keywords of the video as a list of strings can be an empty list if there are no keywords diff --git a/yourtube/scraping.py b/yourtube/scraping.py index 4102e7a..2addb3a 100644 --- a/yourtube/scraping.py +++ b/yourtube/scraping.py @@ -46,13 +46,12 @@ def get_title(content): def get_view_count(content): candidates = re.findall(r'"viewCount":"([0-9]+)"', content.text) candidates = set(candidates) - assert len(candidates) <= 1 - if candidates: - view_count = candidates.pop() - return int(view_count) - else: - # view count is absent for example in premium videos + assert 1 <= len(candidates) <= 2 + if len(candidates) == 2: + # premium videos list 2 different video versions return None + view_count = candidates.pop() + return int(view_count) def get_like_count(content): @@ -99,7 +98,10 @@ def get_category(content): def get_length(content): candidates = re.findall(r'"videoDetails":.*?"lengthSeconds":"(.*?)"', content.text) candidates = set(candidates) - assert len(candidates) == 1 + assert 1 <= len(candidates) <= 2 + if len(candidates) == 2: + # premium videos list 2 different lengths + return None length = candidates.pop() return int(length)