Merge pull request #270 from MalloyDelacroix/erome_fix

Fix erome extraction and download
MalloyDelacroix · Jan 18, 2022 · d8e45c8 · d8e45c8
2 parents e2547a6 + a6f19da
commit d8e45c8
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 7 deletions.
diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md
@@ -1,6 +1,12 @@
 # Change Log
 
 
+## v3.13.2
+
+### Bug Fixes
+* Fix extraction errors for erome hosted content.
+
+
 ## v3.13.1
 
 ### Bug Fixes

diff --git a/DownloaderForReddit/core/downloader.py b/DownloaderForReddit/core/downloader.py
@@ -78,7 +78,7 @@ def download(self, content_id: int):
         try:
             with self.db.get_scoped_session() as session:
                 content = session.query(Content).get(content_id)
-                response = requests.get(content.url, stream=True, timeout=10)
+                response = requests.get(content.url, stream=True, timeout=10, headers=self.check_headers(content.url))
                 if response.status_code == 200:
                     file_size = int(response.headers['Content-Length'])
                     content.download_title = general_utils.check_file_path(content)
@@ -103,6 +103,17 @@ def download(self, content_id: int):
         except:
             self.handle_unknown_error(content)
 
+    def check_headers(self, url):
+        """
+        This is a helper method to add a necessary header entry for erome downloads.  It is just a patch for a problem
+        at the moment.  This can be expanded as further need arises, or replaced by a different better system.
+        :param url: The url on which a download is about to be performed.
+        :return: A dict to be used as a request header where applicable, None if not.
+        """
+        if 'erome' in url:
+            return {"Referer": "https://www.erome.com/"}
+        return None
+
     def finish_download(self, content: Content):
         """
         Wraps up loose ends from the download process.  Takes care of updating the user about the download status,

diff --git a/DownloaderForReddit/extractors/erome_extractor.py b/DownloaderForReddit/extractors/erome_extractor.py
@@ -41,7 +41,7 @@ def get_content(tag):
     if video_tags:
         return video_tags[0].find_all('source')[0].get('src')
     else:
-        img_tags = tag.find_all(class_filter('img'))
+        img_tags = tag.find_all(class_filter('img-back'))
         return img_tags[0].get("data-src")
 
 
@@ -66,10 +66,6 @@ def extract_content(self):
             message = 'Failed to locate content'
             self.handle_failed_extract(error=Error.FAILED_TO_LOCATE, message=message, extractor_error_message=message)
 
-    def get_soup(self):
-        soup = BeautifulSoup(self.get_text(self.url), 'html.parser')
-        return soup
-
     def extract_single(self):
         # Singles are just ablums containing 1 item
         pass
@@ -84,5 +80,8 @@ def extract_album(self):
         for url in urls:
             _, hosted_id = url.rsplit('/', 1)
             base, extension = hosted_id.rsplit('.', 1)
+            # Image urls have an identifier param after the url, this removes it to get a clean extension
+            if '?' in extension:
+                extension = extension.split('?')[0]
             self.make_content(url, extension, count=count if count > 0 else None, media_id=base)
             count += 1
diff --git a/DownloaderForReddit/version.py b/DownloaderForReddit/version.py
@@ -1,4 +1,4 @@
-__version__ = 'v3.13.1'
+__version__ = 'v3.13.2'
 
 
 def get_major_version():