diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index eca409c..14184bb 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -1,6 +1,12 @@ # Change Log +## v3.13.2 + +### Bug Fixes +* Fix extraction errors for erome hosted content. + + ## v3.13.1 ### Bug Fixes diff --git a/DownloaderForReddit/core/downloader.py b/DownloaderForReddit/core/downloader.py index 55cd544..9f755bc 100644 --- a/DownloaderForReddit/core/downloader.py +++ b/DownloaderForReddit/core/downloader.py @@ -78,7 +78,7 @@ def download(self, content_id: int): try: with self.db.get_scoped_session() as session: content = session.query(Content).get(content_id) - response = requests.get(content.url, stream=True, timeout=10) + response = requests.get(content.url, stream=True, timeout=10, headers=self.check_headers(content.url)) if response.status_code == 200: file_size = int(response.headers['Content-Length']) content.download_title = general_utils.check_file_path(content) @@ -103,6 +103,17 @@ def download(self, content_id: int): except: self.handle_unknown_error(content) + def check_headers(self, url): + """ + This is a helper method to add a necessary header entry for erome downloads. It is just a patch for a problem + at the moment. This can be expanded as further need arises, or replaced by a different better system. + :param url: The url on which a download is about to be performed. + :return: A dict to be used as a request header where applicable, None if not. + """ + if 'erome' in url: + return {"Referer": "https://www.erome.com/"} + return None + def finish_download(self, content: Content): """ Wraps up loose ends from the download process. Takes care of updating the user about the download status, diff --git a/DownloaderForReddit/extractors/erome_extractor.py b/DownloaderForReddit/extractors/erome_extractor.py index 48ded97..cb9916c 100644 --- a/DownloaderForReddit/extractors/erome_extractor.py +++ b/DownloaderForReddit/extractors/erome_extractor.py @@ -41,7 +41,7 @@ def get_content(tag): if video_tags: return video_tags[0].find_all('source')[0].get('src') else: - img_tags = tag.find_all(class_filter('img')) + img_tags = tag.find_all(class_filter('img-back')) return img_tags[0].get("data-src") @@ -66,10 +66,6 @@ def extract_content(self): message = 'Failed to locate content' self.handle_failed_extract(error=Error.FAILED_TO_LOCATE, message=message, extractor_error_message=message) - def get_soup(self): - soup = BeautifulSoup(self.get_text(self.url), 'html.parser') - return soup - def extract_single(self): # Singles are just ablums containing 1 item pass @@ -84,5 +80,8 @@ def extract_album(self): for url in urls: _, hosted_id = url.rsplit('/', 1) base, extension = hosted_id.rsplit('.', 1) + # Image urls have an identifier param after the url, this removes it to get a clean extension + if '?' in extension: + extension = extension.split('?')[0] self.make_content(url, extension, count=count if count > 0 else None, media_id=base) count += 1 diff --git a/DownloaderForReddit/version.py b/DownloaderForReddit/version.py index d0bd60a..ad47893 100644 --- a/DownloaderForReddit/version.py +++ b/DownloaderForReddit/version.py @@ -1,4 +1,4 @@ -__version__ = 'v3.13.1' +__version__ = 'v3.13.2' def get_major_version():