diff --git a/src/program/services/scrapers/__init__.py b/src/program/services/scrapers/__init__.py index 6a7f297e..d1f8d995 100644 --- a/src/program/services/scrapers/__init__.py +++ b/src/program/services/scrapers/__init__.py @@ -25,17 +25,23 @@ def __init__(self): self.key = "scraping" self.initialized = False self.settings = settings_manager.settings.scraping - self.services = { + self.imdb_services = { # If we are missing imdb_id then we cant scrape here Torrentio: Torrentio(), Knightcrawler: Knightcrawler(), Orionoid: Orionoid(), - Jackett: Jackett(), TorBoxScraper: TorBoxScraper(), Mediafusion: Mediafusion(), - Prowlarr: Prowlarr(), - Zilean: Zilean(), Comet: Comet() } + self.keyword_services = { + Jackett: Jackett(), + Prowlarr: Prowlarr(), + Zilean: Zilean() + } + self.services = { + **self.imdb_services, + **self.keyword_services + } self.initialized = self.validate() if not self.initialized: return @@ -65,6 +71,9 @@ def scrape(self, item: MediaItem, log = True) -> Dict[str, Stream]: total_results = 0 results_lock = threading.RLock() + imdb_id = item.get_top_imdb_id() + available_services = self.services if imdb_id else self.keyword_services + def run_service(service, item,): nonlocal total_results service_results = service.run(item) @@ -77,7 +86,7 @@ def run_service(service, item,): results.update(service_results) total_results += len(service_results) - for service_name, service in self.services.items(): + for service_name, service in available_services.items(): if service.initialized: thread = threading.Thread(target=run_service, args=(service, item), name=service_name.__name__) threads.append(thread) diff --git a/src/program/services/scrapers/comet.py b/src/program/services/scrapers/comet.py index c659b397..b68be53c 100644 --- a/src/program/services/scrapers/comet.py +++ b/src/program/services/scrapers/comet.py @@ -88,18 +88,14 @@ def run(self, item: MediaItem) -> Dict[str, str]: logger.error(f"Comet exception thrown: {str(e)}") return {} - def scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: """Wrapper for `Comet` scrape method""" identifier, scrape_type, imdb_id = _get_stremio_identifier(item) - if not imdb_id: - return {} - url = f"{self.settings.url}/{self.encoded_string}/stream/{scrape_type}/{imdb_id}{identifier or ''}.json" response = self.request_handler.execute(HttpMethod.GET, url, timeout=self.timeout) - if not response.is_ok or not getattr(response.data, "streams", None): + logger.log("NOT_FOUND", f"No streams found for {item.log_string}") return {} torrents: Dict[str, str] = {} diff --git a/src/program/services/scrapers/jackett.py b/src/program/services/scrapers/jackett.py index d7977015..1f6e8a81 100644 --- a/src/program/services/scrapers/jackett.py +++ b/src/program/services/scrapers/jackett.py @@ -98,15 +98,6 @@ def run(self, item: MediaItem) -> Generator[MediaItem, None, None]: def scrape(self, item: MediaItem) -> Dict[str, str]: """Scrape the given media item""" - data, stream_count = self.api_scrape(item) - if data: - logger.log("SCRAPER", f"Found {len(data)} streams out of {stream_count} for {item.log_string}") - else: - logger.log("NOT_FOUND", f"No streams found for {item.log_string}") - return data - - def api_scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: - """Wrapper for `Jackett` scrape method""" results_queue = queue.Queue() threads = [ threading.Thread(target=self._thread_target, args=(item, indexer, results_queue)) @@ -118,8 +109,22 @@ def api_scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: for thread in threads: thread.join() - results = self._collect_results(results_queue) - return self._process_results(results) + results = [] + while not results_queue.empty(): + results.extend(results_queue.get()) + + torrents: Dict[str, str] = {} + for result in results: + if result[1] is None: + continue + # infohash: raw_title + torrents[result[1]] = result[0] + + if torrents: + logger.log("SCRAPER", f"Found {len(torrents)} streams for {item.log_string}") + else: + logger.log("NOT_FOUND", f"No streams found for {item.log_string}") + return torrents def _thread_target(self, item: MediaItem, indexer: JackettIndexer, results_queue: queue.Queue): """Thread target for searching indexers""" @@ -144,23 +149,6 @@ def _search_indexer(self, item: MediaItem, indexer: JackettIndexer) -> List[Tupl else: raise TypeError("Only Movie and Series is allowed!") - def _collect_results(self, results_queue: queue.Queue) -> List[Tuple[str, str]]: - """Collect results from the queue""" - results = [] - while not results_queue.empty(): - results.extend(results_queue.get()) - return results - - def _process_results(self, results: List[Tuple[str, str]]) -> Tuple[Dict[str, str], int]: - """Process the results and return the torrents""" - torrents: Dict[str, str] = {} - for result in results: - if result[1] is None: - continue - # infohash: raw_title - torrents[result[1]] = result[0] - return torrents, len(results) - def _search_movie_indexer(self, item: MediaItem, indexer: JackettIndexer) -> List[Tuple[str, str]]: """Search for movies on the given indexer""" if indexer.movie_search_capabilities is None: @@ -205,13 +193,14 @@ def _search_series_indexer(self, item: MediaItem, indexer: JackettIndexer) -> Li def _get_series_search_params(self, item: MediaItem) -> Tuple[str, int, Optional[int]]: """Get search parameters for series""" + title = item.get_top_title() if isinstance(item, Show): - return item.get_top_title(), None, None + return title, None, None elif isinstance(item, Season): - return item.get_top_title(), item.number, None + return title, item.number, None elif isinstance(item, Episode): - return item.get_top_title(), item.parent.number, item.number - return "", 0, None + return title, item.parent.number, item.number + return title, None, None def _get_indexers(self) -> List[JackettIndexer]: """Get the indexers from Jackett""" diff --git a/src/program/services/scrapers/knightcrawler.py b/src/program/services/scrapers/knightcrawler.py index 5fbb396c..ee47b98d 100644 --- a/src/program/services/scrapers/knightcrawler.py +++ b/src/program/services/scrapers/knightcrawler.py @@ -89,8 +89,8 @@ def scrape(self, item: MediaItem) -> Dict[str, str]: url += identifier response = self.request_handler.execute(HttpMethod.GET, f"{url}.json", timeout=self.timeout) - if not response.is_ok or len(response.data.streams) <= 0: + logger.log("NOT_FOUND", f"No streams found for {item.log_string}") return {} torrents = { diff --git a/src/program/services/scrapers/mediafusion.py b/src/program/services/scrapers/mediafusion.py index 1a16d74a..8bded437 100644 --- a/src/program/services/scrapers/mediafusion.py +++ b/src/program/services/scrapers/mediafusion.py @@ -133,6 +133,7 @@ def scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: response = self.request_handler.execute(HttpMethod.GET, f"{url}.json", timeout=self.timeout) if not response.is_ok or len(response.data.streams) <= 0: + logger.log("NOT_FOUND", f"No streams found for {item.log_string}") return {} torrents: Dict[str, str] = {} diff --git a/src/program/services/scrapers/orionoid.py b/src/program/services/scrapers/orionoid.py index ccab4e41..962b2eb7 100644 --- a/src/program/services/scrapers/orionoid.py +++ b/src/program/services/scrapers/orionoid.py @@ -153,6 +153,7 @@ def scrape(self, item: MediaItem) -> Dict[str, str]: params = self._build_query_params(item) response = self.request_handler.execute(HttpMethod.GET, self.base_url, params=params, timeout=self.timeout) if not response.is_ok or not hasattr(response.data, "data"): + logger.log("NOT_FOUND", f"No streams found for {item.log_string}") return {} torrents = {} diff --git a/src/program/services/scrapers/prowlarr.py b/src/program/services/scrapers/prowlarr.py index 9315f83a..5d3c7719 100644 --- a/src/program/services/scrapers/prowlarr.py +++ b/src/program/services/scrapers/prowlarr.py @@ -100,16 +100,7 @@ def run(self, item: MediaItem) -> Dict[str, str]: return {} def scrape(self, item: MediaItem) -> Dict[str, str]: - """Scrape the given media item""" - data, stream_count = self.api_scrape(item) - if data: - logger.log("SCRAPER", f"Found {len(data)} streams out of {stream_count} for {item.log_string}") - else: - logger.log("NOT_FOUND", f"No streams found for {item.log_string}") - return data - - def api_scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: - """Wrapper for `Prowlarr` scrape method""" + """Scrape the given media item using Prowlarr indexers""" results_queue = queue.Queue() threads = [ threading.Thread(target=self._thread_target, args=(item, indexer, results_queue)) @@ -121,8 +112,22 @@ def api_scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: for thread in threads: thread.join() - results = self._collect_results(results_queue) - return self._process_results(results) + results = [] + while not results_queue.empty(): + results.extend(results_queue.get()) + + torrents: Dict[str, str] = {} + for result in results: + if result[1] is None: + continue + torrents[result[1]] = result[0] + + if torrents: + logger.log("SCRAPER", f"Found {len(torrents)} streams for {item.log_string}") + else: + logger.log("NOT_FOUND", f"No streams found for {item.log_string}") + + return torrents def _thread_target(self, item: MediaItem, indexer: ProwlarrIndexer, results_queue: queue.Queue): try: @@ -146,26 +151,6 @@ def _search_indexer(self, item: MediaItem, indexer: ProwlarrIndexer) -> List[Tup else: raise TypeError("Only Movie and Series is allowed!") - def _collect_results(self, results_queue: queue.Queue) -> List[Tuple[str, str]]: - """Collect results from the queue""" - results = [] - while not results_queue.empty(): - results.extend(results_queue.get()) - return results - - def _process_results(self, results: List[Tuple[str, str]]) -> Tuple[Dict[str, str], int]: - """Process the results and return the torrents""" - torrents: Dict[str, str] = {} - - for result in results: - if result[1] is None: - continue - - # infohash: raw_title - torrents[result[1]] = result[0] - - return torrents, len(results) - def _search_movie_indexer(self, item: MediaItem, indexer: ProwlarrIndexer) -> List[Tuple[str, str]]: """Search for movies on the given indexer""" if indexer.movie_search_capabilities is None: @@ -209,13 +194,14 @@ def _search_series_indexer(self, item: MediaItem, indexer: ProwlarrIndexer) -> L def _get_series_search_params(self, item: MediaItem) -> Tuple[str, int, Optional[int]]: """Get search parameters for series""" + title = item.get_top_title() if isinstance(item, Show): - return item.get_top_title(), None, None + return title, None, None elif isinstance(item, Season): - return item.get_top_title(), item.number, None + return title, item.number, None elif isinstance(item, Episode): - return item.get_top_title(), item.parent.number, item.number - return "", 0, None + return title, item.parent.number, item.number + return title, None, None def _get_indexers(self) -> List[ProwlarrIndexer]: """Get the indexers from Prowlarr""" diff --git a/src/program/services/scrapers/shared.py b/src/program/services/scrapers/shared.py index e0565603..15175aab 100644 --- a/src/program/services/scrapers/shared.py +++ b/src/program/services/scrapers/shared.py @@ -132,7 +132,7 @@ def _parse_results(item: MediaItem, results: Dict[str, str], log_msg: bool = Tru continue if torrents: - logger.log("SCRAPER", f"Processed {len(torrents)} matches for {item.log_string}") + logger.log("SCRAPER", f"Found {len(torrents)} streams for {item.log_string}") torrents = sort_torrents(torrents) torrents_dict = {} for torrent in torrents.values(): diff --git a/src/program/services/scrapers/torbox.py b/src/program/services/scrapers/torbox.py index 01f9fbb9..7d687439 100644 --- a/src/program/services/scrapers/torbox.py +++ b/src/program/services/scrapers/torbox.py @@ -15,7 +15,7 @@ def __init__(self): self.key = "torbox" self.settings = settings_manager.settings.scraping.torbox_scraper self.base_url = "http://search-api.torbox.app" - self.user_plan = None + self.headers = {"Authorization": f"Bearer {self.settings.api_key}"} self.timeout = self.settings.timeout session = create_service_session() self.request_handler = ScraperRequestHandler(session) @@ -28,11 +28,14 @@ def validate(self) -> bool: """Validate the TorBox Scraper as a service""" if not self.settings.enabled: return False + if not self.settings.api_key: + logger.error("TorBox API key is not set.") + return False if not isinstance(self.timeout, int) or self.timeout <= 0: logger.error("TorBox timeout is not set or invalid.") return False try: - response = self.request_handler.execute(HttpMethod.GET, f"{self.base_url}/torrents/imdb:tt0944947?metadata=false&season=1&episode=1", timeout=self.timeout) + response = self.request_handler.execute(HttpMethod.GET, f"{self.base_url}/torrents/imdb:tt0944947?metadata=false&season=1&episode=1", headers=self.headers, timeout=self.timeout) return response.is_ok except Exception as e: logger.exception(f"Error validating TorBox Scraper: {e}") @@ -57,22 +60,25 @@ def run(self, item: MediaItem) -> Dict[str, str]: def _build_query_params(self, item: MediaItem) -> str: """Build the query params for the TorBox API""" - params = [f"imdb:{item.imdb_id}"] - if item.type == "show": - params.append("season=1") + imdb_id = item.get_top_imdb_id() + if item.type == "movie": + return f"torrents/imdb:{imdb_id}" + elif item.type == "show": + return f"torrents/imdb:{imdb_id}?season=1&episode=1" elif item.type == "season": - params.append(f"season={item.number}") + return f"torrents/imdb:{imdb_id}?season={item.number}&episode=1" elif item.type == "episode": - params.append(f"season={item.parent.number}&episode={item.number}") - return "&".join(params) + return f"torrents/imdb:{imdb_id}?season={item.parent.number}&episode={item.number}" + return "" - def scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: + def scrape(self, item: MediaItem) -> Dict[str, str]: """Wrapper for `Torbox` scrape method using Torbox API""" query_params = self._build_query_params(item) - url = f"{self.base_url}/torrents/{query_params}?metadata=false" + url = f"{self.base_url}/{query_params}&metadata=false" - response = self.request_handler.execute(HttpMethod.GET, url, timeout=self.timeout) + response = self.request_handler.execute(HttpMethod.GET, url, headers=self.headers, timeout=self.timeout) if not response.is_ok or not response.data.data.torrents: + logger.log("NOT_FOUND", f"No streams found for {item.log_string}") return {} torrents = {} diff --git a/src/program/services/scrapers/torrentio.py b/src/program/services/scrapers/torrentio.py index 4e298ed2..2f100a94 100644 --- a/src/program/services/scrapers/torrentio.py +++ b/src/program/services/scrapers/torrentio.py @@ -74,10 +74,8 @@ def scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: url += identifier response = self.request_handler.execute(HttpMethod.GET, f"{url}.json", timeout=self.timeout) - if not response.is_ok: - return {} - - if not hasattr(response.data, 'streams') or not response.data.streams: + if not response.is_ok or not hasattr(response.data, 'streams') or not response.data.streams: + logger.log("NOT_FOUND", f"No streams found for {item.log_string}") return {} torrents: Dict[str, str] = {} diff --git a/src/program/services/scrapers/zilean.py b/src/program/services/scrapers/zilean.py index 5c337929..32ca7d84 100644 --- a/src/program/services/scrapers/zilean.py +++ b/src/program/services/scrapers/zilean.py @@ -77,6 +77,7 @@ def scrape(self, item: MediaItem) -> Dict[str, str]: response = self.request_handler.execute(HttpMethod.GET, url, params=params, timeout=self.timeout) if not response.is_ok or not response.data: + logger.log("NOT_FOUND", f"No streams found for {item.log_string}") return {} torrents: Dict[str, str] = {} diff --git a/src/program/settings/models.py b/src/program/settings/models.py index 8773da40..941cda93 100644 --- a/src/program/settings/models.py +++ b/src/program/settings/models.py @@ -271,6 +271,7 @@ class ProwlarrConfig(Observable): class TorBoxScraperConfig(Observable): enabled: bool = False + api_key: str = "" timeout: int = 30