Skip to content

Commit

Permalink
feat: implement filesize validation for movies and episodes (#869)
Browse files Browse the repository at this point in the history
* feat: implement filesize validation for movies and episodes

* feat: enhance filesize validation for movies and episodes, improve logging and delete invalid torrents

* fix: fix invalid log string property accessor

* feat: reintroduce and refactor filesize validation with media type enums

- Added `ShowMediaType`, `MovieMediaType`, and `MediaType` enums to categorize media types.
- Refactored `filesize_is_acceptable` function to use media type enums for validation.
- Updated `Downloader` class to utilize the new media type enums for file validation.
- Consolidated movie and show filesize validation into a single function.
- Enhanced code readability and maintainability by using enums and reducing redundancy.

* fix: ordering of assignment after super call

* fix: ordering of assignment before super call

* fix: remove duplicate request logging
Caller should log on exception.

* fix: remove possible duplicate hashes from uppercase, and lowercase scrapers.

* feat: enhance downloader with improved filesize validation and logging

- Introduced `InvalidFileSizeException` for better error handling.
- Added `DownloadCachedStreamResult` class to encapsulate download results.
- Refactored `validate_filesize` to raise exceptions and log invalid file sizes.
- Enhanced logging with `get_invalid_filesize_log_string` for clearer messages.
- Improved structure and readability of the downloader service.

* fix: less cpu cycles

* fix: ensure item attributes are updated after filesize validation

---------

Co-authored-by: PromKnight <promknight@proton.me>
  • Loading branch information
davidemarcoli and iPromKnight authored Nov 13, 2024
1 parent f2636e4 commit d1041db
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 87 deletions.
2 changes: 1 addition & 1 deletion src/program/media/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .item import Episode, MediaItem, Movie, Season, Show # noqa
from .item import Episode, MediaItem, Movie, Season, Show, ShowMediaType, MovieMediaType, MediaType # noqa
from .state import States # noqa
30 changes: 23 additions & 7 deletions src/program/media/item.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""MediaItem class"""
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import List, Optional, Self

Expand All @@ -17,6 +18,22 @@
from ..db.db_functions import blacklist_stream, reset_streams
from .stream import Stream

class ShowMediaType(Enum):
"""Show media types"""
Show = "show"
Season = "season"
Episode = "episode"

class MovieMediaType(Enum):
"""Media types"""
Movie = "movie"

class MediaType(Enum):
"""Combined media types"""
Show = ShowMediaType.Show.value
Season = ShowMediaType.Season.value
Episode = ShowMediaType.Episode.value
Movie = MovieMediaType.Movie.value

class MediaItem(db.Model):
"""MediaItem class"""
Expand Down Expand Up @@ -129,7 +146,7 @@ def __init__(self, item: dict | None) -> None:
# Overseerr related
self.overseerr_id = item.get("overseerr_id")

#Post processing
# Post-processing
self.subtitles = item.get("subtitles", [])

@staticmethod
Expand Down Expand Up @@ -406,7 +423,7 @@ def copy(self, other):
return self

def __init__(self, item):
self.type = "movie"
self.type = MovieMediaType.Movie.value
self.file = item.get("file", None)
super().__init__(item)

Expand All @@ -428,11 +445,11 @@ class Show(MediaItem):
}

def __init__(self, item):
super().__init__(item)
self.type = "show"
self.type = ShowMediaType.Show.value
self.locations = item.get("locations", [])
self.seasons: list[Season] = item.get("seasons", [])
self.propagate_attributes_to_childs()
super().__init__(item)

def get_season_index_by_id(self, item_id):
"""Find the index of an season by its _id."""
Expand Down Expand Up @@ -543,10 +560,9 @@ def store_state(self, given_state: States = None) -> None:
super().store_state(given_state)

def __init__(self, item):
self.type = "season"
self.type = ShowMediaType.Season.value
self.number = item.get("number", None)
self.episodes: list[Episode] = item.get("episodes", [])
self.parent = item.get("parent", None)
super().__init__(item)
if self.parent and isinstance(self.parent, Show):
self.is_anime = self.parent.is_anime
Expand Down Expand Up @@ -643,7 +659,7 @@ class Episode(MediaItem):
}

def __init__(self, item):
self.type = "episode"
self.type = ShowMediaType.Episode.value
self.number = item.get("number", None)
self.file = item.get("file", None)
super().__init__(item)
Expand Down
87 changes: 57 additions & 30 deletions src/program/services/downloaders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
from concurrent.futures import CancelledError, ThreadPoolExecutor, as_completed

from loguru import logger

from program.media.item import MediaItem
from program.media.item import MediaItem, MovieMediaType, ShowMediaType
from program.media.state import States
from program.media.stream import Stream
from program.settings.manager import settings_manager
from program.services.downloaders.shared import filesize_is_acceptable, get_invalid_filesize_log_string

from .alldebrid import AllDebridDownloader
from .realdebrid import RealDebridDownloader

# from .torbox import TorBoxDownloader

class InvalidFileSizeException(Exception):
pass

class DownloadCachedStreamResult:
def __init__(self, container=None, torrent_id=None, info=None, info_hash=None):
self.container = container
self.torrent_id = torrent_id
self.info = info
self.info_hash = info_hash

class Downloader:
def __init__(self):
Expand Down Expand Up @@ -41,33 +48,33 @@ def validate(self):

def run(self, item: MediaItem):
logger.debug(f"Running downloader for {item.log_string}")

for stream in item.streams:
torrent_id = None
download_result = None
try:
torrent_id = self.download_cached_stream(item, stream)
if torrent_id:
download_result = self.download_cached_stream(item, stream)
if download_result:
self.validate_filesize(item, download_result)
if not self.update_item_attributes(item, download_result):
raise Exception("No matching files found!")
break
except Exception as e:
if torrent_id:
self.service.delete_torrent(torrent_id)
logger.debug(f"Blacklisting {stream.raw_title} for {item.log_string}, reason: {e}")
if download_result and download_result.torrent_id:
self.service.delete_torrent(download_result.torrent_id)
logger.debug(f"Invalid stream: {stream.infohash} - reason: {e}")
item.blacklist_stream(stream)
yield item

def download_cached_stream(self, item: MediaItem, stream: Stream) -> bool:
torrent_id = None

def download_cached_stream(self, item: MediaItem, stream: Stream) -> DownloadCachedStreamResult:
cached_containers = self.get_instant_availability([stream.infohash]).get(stream.infohash, None)
if not cached_containers:
raise Exception("Not cached!")
the_container = cached_containers[0]
torrent_id = self.add_torrent(stream.infohash)
info = self.get_torrent_info(torrent_id)
self.select_files(torrent_id, the_container.keys())
if not self.update_item_attributes(item, info, the_container):
raise Exception("No matching files found!")
logger.log("DEBRID", f"Downloaded {item.log_string} from '{stream.raw_title}' [{stream.infohash}]")
return torrent_id
return DownloadCachedStreamResult(the_container, torrent_id, info, stream.infohash)

def get_instant_availability(self, infohashes: list[str]) -> dict[str, list[dict]]:
return self.service.get_instant_availability(infohashes)
Expand All @@ -84,24 +91,30 @@ def select_files(self, torrent_id, container):
def delete_torrent(self, torrent_id):
self.service.delete_torrent(torrent_id)

def update_item_attributes(self, item: MediaItem, info, container) -> bool:
def update_item_attributes(self, item: MediaItem, download_result: DownloadCachedStreamResult) -> bool:
"""Update the item attributes with the downloaded files and active stream"""
found = False
item = item
container = container
info_hash = download_result.info.get("hash", None)
id = download_result.info.get("id", None)
original_filename = download_result.info.get("original_filename", None)
filename = download_result.info.get("filename", None)
if not info_hash or not id or not original_filename or not filename:
return False
container = download_result.container
for file in container.values():
if item.type == "movie" and self.service.file_finder.container_file_matches_movie(file):
if item.type == MovieMediaType.Movie.value and self.service.file_finder.container_file_matches_movie(file):
item.file = file[self.service.file_finder.filename_attr]
item.folder = info["filename"]
item.alternative_folder = info["original_filename"]
item.active_stream = {"infohash": info["hash"], "id": info["id"]}
item.folder = filename
item.alternative_folder = original_filename
item.active_stream = {"infohash": info_hash, "id": id}
found = True
break
if item.type in ["show", "season", "episode"]:
if item.type in (ShowMediaType.Show.value, ShowMediaType.Season.value, ShowMediaType.Episode.value):
show = item
if item.type == "season":
if item.type == ShowMediaType.Season.value:
show = item.parent
elif item.type == "episode":
elif item.type == ShowMediaType.Episode.value:
show = item.parent.parent
file_season, file_episodes = self.service.file_finder.container_file_matches_episode(file)
if file_season and file_episodes:
Expand All @@ -110,10 +123,24 @@ def update_item_attributes(self, item: MediaItem, info, container) -> bool:
episode = next((episode for episode in season.episodes if episode.number == file_episode), None)
if episode and episode.state not in [States.Completed, States.Symlinked, States.Downloaded]:
episode.file = file[self.service.file_finder.filename_attr]
episode.folder = info["filename"]
episode.alternative_folder = info["original_filename"]
episode.active_stream = {"infohash": info["hash"], "id": info["id"]}
episode.folder = filename
episode.alternative_folder = original_filename
episode.active_stream = {"infohash": info_hash, "id": id}
# We have to make sure the episode is correct if item is an episode
if item.type != "episode" or (item.type == "episode" and episode.number == item.number):
if item.type != ShowMediaType.Episode.value or (item.type == ShowMediaType.Episode.value and episode.number == item.number):
found = True
return found
return found

def validate_filesize(self, item: MediaItem, download_result: DownloadCachedStreamResult):
for file in download_result.container.values():
item_media_type = self._get_item_media_type(item)
if not filesize_is_acceptable(file[self.service.file_finder.filesize_attr], item_media_type):

raise InvalidFileSizeException(f"File '{file[self.service.file_finder.filename_attr]}' is invalid: {get_invalid_filesize_log_string(file[self.service.file_finder.filesize_attr], item_media_type)}")
logger.debug(f"All files for {download_result.info_hash} are of an acceptable size")

@staticmethod
def _get_item_media_type(item):
if item.type in (media_type.value for media_type in ShowMediaType):
return ShowMediaType.Show.value
return MovieMediaType.Movie.value
117 changes: 72 additions & 45 deletions src/program/services/downloaders/shared.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from abc import ABC, abstractmethod
from datetime import datetime
from typing import Tuple

from loguru import logger
from RTN import parse

from program.media import MovieMediaType, ShowMediaType
from program.settings.manager import settings_manager

DEFAULT_VIDEO_EXTENSIONS = ["mp4", "mkv", "avi"]
Expand Down Expand Up @@ -73,31 +75,9 @@ class FileFinder:
filename_attr (str): The name of the file attribute.
"""

min_movie_filesize = settings_manager.settings.downloaders.movie_filesize_mb_min
max_movie_filesize = settings_manager.settings.downloaders.movie_filesize_mb_max
min_episode_filesize = settings_manager.settings.downloaders.episode_filesize_mb_min
max_episode_filesize = settings_manager.settings.downloaders.episode_filesize_mb_max
are_filesizes_valid = False

def __init__(self, name, size):
self.filename_attr = name
self.filesize_attr = size
self.are_filesizes_valid = self._validate_filesizes()

def _validate_filesizes(self) -> bool:
if not isinstance(settings_manager.settings.downloaders.movie_filesize_mb_min, int) or settings_manager.settings.downloaders.movie_filesize_mb_min < -1:
logger.error("Movie filesize min is not set or invalid.")
return False
if not isinstance(settings_manager.settings.downloaders.movie_filesize_mb_max, int) or settings_manager.settings.downloaders.movie_filesize_mb_max < -1:
logger.error("Movie filesize max is not set or invalid.")
return False
if not isinstance(settings_manager.settings.downloaders.episode_filesize_mb_min, int) or settings_manager.settings.downloaders.episode_filesize_mb_min < -1:
logger.error("Episode filesize min is not set or invalid.")
return False
if not isinstance(settings_manager.settings.downloaders.episode_filesize_mb_max, int) or settings_manager.settings.downloaders.episode_filesize_mb_max < -1:
logger.error("Episode filesize max is not set or invalid.")
return False
return True

def container_file_matches_episode(self, file):
filename = file[self.filename_attr]
Expand All @@ -115,29 +95,6 @@ def container_file_matches_movie(self, file):
except Exception:
return None

def filesize_is_acceptable_movie(self, filesize):
if not self.are_filesizes_valid:
logger.error("Filesize settings are invalid, movie file sizes will not be checked.")
return True
min_size = settings_manager.settings.downloaders.movie_filesize_mb_min * 1_000_000
max_size = settings_manager.settings.downloaders.movie_filesize_mb_max * 1_000_000 if settings_manager.settings.downloaders.movie_filesize_mb_max != -1 else float("inf")
is_acceptable = min_size <= filesize <= max_size
if not is_acceptable:
logger.debug(f"Filesize {filesize} is not within acceptable range {min_size} - {max_size}")
return is_acceptable

def filesize_is_acceptable_show(self, filesize):
if not self.are_filesizes_valid:
logger.error("Filesize settings are invalid, episode file sizes will not be checked.")
return True
min_size = settings_manager.settings.downloaders.episode_filesize_mb_min * 1_000_000
max_size = settings_manager.settings.downloaders.episode_filesize_mb_max * 1_000_000 if settings_manager.settings.downloaders.episode_filesize_mb_max != -1 else float("inf")
is_acceptable = min_size <= filesize <= max_size
if not is_acceptable:
logger.debug(f"Filesize {filesize} is not within acceptable range {min_size} - {max_size}")
return is_acceptable


def premium_days_left(expiration: datetime) -> str:
"""Convert an expiration date into a message showing days remaining on the user's premium account"""
time_left = expiration - datetime.utcnow()
Expand All @@ -162,3 +119,73 @@ def hash_from_uri(magnet_uri: str) -> str:
return magnet_uri
start = magnet_uri.index("urn:btih:") + len("urn:btih:")
return magnet_uri[start : start + 40]

min_movie_filesize = settings_manager.settings.downloaders.movie_filesize_mb_min
max_movie_filesize = settings_manager.settings.downloaders.movie_filesize_mb_max
min_episode_filesize = settings_manager.settings.downloaders.episode_filesize_mb_min
max_episode_filesize = settings_manager.settings.downloaders.episode_filesize_mb_max

def _validate_filesize_setting(value: int, setting_name: str) -> bool:
"""Validate a single filesize setting."""
if not isinstance(value, int) or value < -1:
logger.error(f"{setting_name} is not valid. Got {value}, expected integer >= -1")
return False
return True

def _validate_filesizes() -> bool:
"""
Validate all filesize settings from configuration.
Returns True if all settings are valid integers >= -1, False otherwise.
"""
settings = settings_manager.settings.downloaders
return all([
_validate_filesize_setting(settings.movie_filesize_mb_min, "Movie filesize min"),
_validate_filesize_setting(settings.movie_filesize_mb_max, "Movie filesize max"),
_validate_filesize_setting(settings.episode_filesize_mb_min, "Episode filesize min"),
_validate_filesize_setting(settings.episode_filesize_mb_max, "Episode filesize max")
])

are_filesizes_valid = _validate_filesizes()

BYTES_PER_MB = 1_000_000

def _convert_to_bytes(size_mb: int) -> int:
"""Convert size from megabytes to bytes."""
return size_mb * BYTES_PER_MB

def _get_size_limits(media_type: str) -> Tuple[int, int]:
"""Get min and max size limits in MB for given media type."""
settings = settings_manager.settings.downloaders
if media_type == MovieMediaType.Movie.value:
return (settings.movie_filesize_mb_min, settings.movie_filesize_mb_max)
return (settings.episode_filesize_mb_min, settings.episode_filesize_mb_max)

def _validate_filesize(filesize: int, media_type: str) -> bool:
"""
Validate file size against configured limits.
Args:
filesize: Size in bytes to validate
media_type: Type of media being validated
Returns:
bool: True if size is within configured range
"""
if not are_filesizes_valid:
logger.error(f"Filesize settings are invalid, {media_type} file sizes will not be checked.")
return True

min_mb, max_mb = _get_size_limits(media_type)
min_size = 0 if min_mb == -1 else _convert_to_bytes(min_mb)
max_size = float("inf") if max_mb == -1 else _convert_to_bytes(max_mb)

return min_size <= filesize <= max_size


def filesize_is_acceptable(filesize: int, media_type: str) -> bool:
return _validate_filesize(filesize, media_type)

def get_invalid_filesize_log_string(filesize: int, media_type: str) -> str:
min_mb, max_mb = _get_size_limits(media_type)
friendly_filesize = round(filesize / BYTES_PER_MB, 2)
return f"{friendly_filesize} MB is not within acceptable range of [{min_mb}MB] to [{max_mb}MB]"
6 changes: 6 additions & 0 deletions src/program/services/scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ def run_service(service, item,):
logger.error(f"Service {service.__class__.__name__} returned invalid results: {service_results}")
return

# ensure that info hash is lower case in each result
if isinstance(service_results, dict):
for infohash in list(service_results.keys()):
if infohash.lower() != infohash:
service_results[infohash.lower()] = service_results.pop(infohash)

with results_lock:
results.update(service_results)
total_results += len(service_results)
Expand Down
Loading

0 comments on commit d1041db

Please sign in to comment.