bradenhilton · pull · Jan 6, 2025 · Jan 5, 2025 · Jan 6, 2025 · Jan 6, 2025
diff --git a/docs/configuration.rst b/docs/configuration.rst
@@ -405,6 +405,8 @@ Default
         ``flickr``,
         ``weibo``,
         ``[wikimedia]``
+    * ``"1.4"``
+        ``wallhaven``
     * ``"2.0-4.0"``
         ``behance``,
         ``imagefap``,

diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
@@ -691,6 +691,8 @@
         "wallhaven":
         {
             "api-key" : null,
+            "sleep-request": "1.4",
+
             "include" : ["uploads"],
             "metadata": false
         },

diff --git a/docs/options.md b/docs/options.md
@@ -48,13 +48,13 @@
     -K, --list-keywords         Print a list of available keywords and example
                                 values for the given URLs
     -e, --error-file FILE       Add input URLs which returned an error to FILE
-    -N, --print [EVENT:]FORMAT  Write FORMAT during EVENT (default 'prepare') to
-                                standard output. Examples: 'id' or
+    -N, --print [EVENT:]FORMAT  Write FORMAT during EVENT (default 'prepare')
+                                to standard output. Examples: 'id' or
                                 'post:{md5[:8]}'
     --print-to-file [EVENT:]FORMAT FILE
                                 Append FORMAT during EVENT to FILE
     --list-modules              Print a list of available extractor modules
-    --list-extractors CATEGORIES
+    --list-extractors [CATEGORIES]
                                 Print a list of extractor classes with
                                 description, (sub)category and example URL
     --write-log FILE            Write logging output to FILE
@@ -67,7 +67,8 @@
 
 ## Networking Options:
     -R, --retries N             Maximum number of retries for failed HTTP
-                                requests or -1 for infinite retries (default: 4)
+                                requests or -1 for infinite retries (default:
+                                4)
     --http-timeout SECONDS      Timeout for HTTP connections (default: 30.0)
     --proxy URL                 Use the specified proxy
     --source-address IP         Client-side IP address to bind to

diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
@@ -111,6 +111,7 @@ def _extract_files(self, work):
                 {
                     "url"   : img["image_urls"]["original"],
                     "suffix": "_p{:02}".format(num),
+                    "_fallback": self._fallback_image(img),
                 }
                 for num, img in enumerate(meta_pages)
             ]
@@ -128,7 +129,7 @@ def _extract_files(self, work):
             self.log.warning("%s: 'My pixiv' locked", work["id"])
 
         elif work["type"] != "ugoira":
-            return ({"url": url},)
+            return ({"url": url, "_fallback": self._fallback_image(url)},)
 
         elif self.load_ugoira:
             try:
@@ -269,6 +270,24 @@ def _extract_ajax_url(self, body):
             except exception.HttpError:
                 pass
 
+    def _fallback_image(self, src):
+        if isinstance(src, str):
+            urls = None
+            orig = src
+        else:
+            urls = src["image_urls"]
+            orig = urls["original"]
+
+        base = orig.rpartition(".")[0]
+        yield base.replace("-original/", "-master/", 1) + "_master1200.jpg"
+
+        if urls is None:
+            return
+
+        for fmt in ("large", "medium", "square_medium"):
+            if fmt in urls:
+                yield urls[fmt]
+
     @staticmethod
     def _date_from_url(url, offset=timedelta(hours=9)):
         try:

diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py
@@ -104,16 +104,16 @@ class PlurkPostExtractor(PlurkExtractor):
     pattern = r"(?:https?://)?(?:www\.)?plurk\.com/p/(\w+)"
     example = "https://www.plurk.com/p/12345"
 
-    def __init__(self, match):
-        PlurkExtractor.__init__(self, match)
-        self.plurk_id = match.group(1)
-
     def plurks(self):
-        url = "{}/p/{}".format(self.root, self.plurk_id)
+        url = "{}/p/{}".format(self.root, self.groups[0])
         page = self.request(url).text
-        user, pos = text.extract(page, " GLOBAL = ", "\n")
-        data, pos = text.extract(page, "plurk = ", ";\n", pos)
+        user, pos = text.extract(page, " GLOBAL=", "\n")
+        data, pos = text.extract(page, "plurk =", ";\n", pos)
 
         data = self._load(data)
-        data["user"] = self._load(user)["page_user"]
+        try:
+            data["user"] = self._load(user)["page_user"]
+        except Exception:
+            self.log.warning("%s: Failed to extract 'user' data",
+                             self.groups[0])
         return (data,)
diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py
@@ -54,7 +54,7 @@ def _transform(wp):
 class WallhavenSearchExtractor(WallhavenExtractor):
     """Extractor for search results on wallhaven.cc"""
     subcategory = "search"
-    directory_fmt = ("{category}", "{search[q]}")
+    directory_fmt = ("{category}", "{search[tags]}")
     archive_fmt = "s_{search[q]}_{id}"
     pattern = r"(?:https?://)?wallhaven\.cc/search(?:/?\?([^#]+))?"
     example = "https://wallhaven.cc/search?q=QUERY"
@@ -64,7 +64,7 @@ def __init__(self, match):
         self.params = text.parse_query(match.group(1))
 
     def wallpapers(self):
-        return self.api.search(self.params.copy())
+        return self.api.search(self.params)
 
     def metadata(self):
         return {"search": self.params}
@@ -141,7 +141,7 @@ def __init__(self, match):
 
     def wallpapers(self):
         params = {"q": "@" + self.username}
-        return self.api.search(params.copy())
+        return self.api.search(params)
 
     def metadata(self):
         return {"username": self.username}
@@ -215,20 +215,35 @@ def _call(self, endpoint, params=None):
 
     def _pagination(self, endpoint, params=None, metadata=None):
         if params is None:
+            params_ptr = None
             params = {}
+        else:
+            params_ptr = params
+            params = params.copy()
         if metadata is None:
             metadata = self.extractor.config("metadata")
 
         while True:
             data = self._call(endpoint, params)
 
+            meta = data.get("meta")
+            if params_ptr is not None:
+                if meta and "query" in meta:
+                    query = meta["query"]
+                    if isinstance(query, dict):
+                        params_ptr["tags"] = query.get("tag")
+                        params_ptr["tag_id"] = query.get("id")
+                    else:
+                        params_ptr["tags"] = query
+                        params_ptr["tag_id"] = 0
+                params_ptr = None
+
             if metadata:
                 for wp in data["data"]:
                     yield self.info(str(wp["id"]))
             else:
                 yield from data["data"]
 
-            meta = data.get("meta")
             if not meta or meta["current_page"] >= meta["last_page"]:
                 return
             params["page"] = meta["current_page"] + 1
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
@@ -323,7 +323,7 @@ def build_parser():
     input.add_argument(
         "--no-input",
         dest="input", nargs=0, action=ConfigConstAction, const=False,
-        help=("Do not prompt for passwords/tokens"),
+        help="Do not prompt for passwords/tokens",
     )
 
     output = parser.add_argument_group("Output Options")
@@ -406,7 +406,7 @@ def build_parser():
     )
     output.add_argument(
         "--list-extractors",
-        dest="list_extractors", metavar="CATEGORIES", nargs="*",
+        dest="list_extractors", metavar="[CATEGORIES]", nargs="*",
         help=("Print a list of extractor classes "
               "with description, (sub)category and example URL"),
     )
@@ -430,12 +430,12 @@ def build_parser():
     output.add_argument(
         "--print-traffic",
         dest="print_traffic", action="store_true",
-        help=("Display sent and read HTTP traffic"),
+        help="Display sent and read HTTP traffic",
     )
     output.add_argument(
         "--no-colors",
         dest="colors", action="store_false",
-        help=("Do not emit ANSI color codes in output"),
+        help="Do not emit ANSI color codes in output",
     )
 
     networking = parser.add_argument_group("Networking Options")

diff --git a/gallery_dl/util.py b/gallery_dl/util.py
@@ -83,7 +83,7 @@ def unique_sequence(iterable):
 
 def contains(values, elements, separator=" "):
     """Returns True if at least one of 'elements' is contained in 'values'"""
-    if isinstance(values, str):
+    if isinstance(values, str) and (separator or separator is None):
         values = values.split(separator)
 
     if not isinstance(elements, (tuple, list)):

diff --git a/gallery_dl/version.py b/gallery_dl/version.py
@@ -6,5 +6,5 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-__version__ = "1.28.3"
+__version__ = "1.28.4-dev"
 __variant__ = None
diff --git a/test/results/pixiv.py b/test/results/pixiv.py
@@ -289,6 +289,15 @@
     "caption": r"re:Either she doesn't know how to pose or she can't move with that much clothing on her, in any case she's very well dressed for a holiday trip around town. Lots of stuff to see and a perfect day to grab some sweet pastries at the bakery.<br />...",
 },
 
+{
+    "#url"     : "https://www.pixiv.net/artworks/56360615",
+    "#comment" : "fallback; 'original' version results in HTTP 500 error (#6762)",
+    "#class"   : pixiv.PixivWorkExtractor,
+    "#options" : {"retries": 0},
+    "#range"   : "4",
+    "#sha1_content": "aa119c27fec0a36bbd06e7491987acf5f1be6293",
+},
+
 {
     "#url"     : "https://www.pixiv.net/en/artworks/966412",
     "#category": ("", "pixiv", "work"),

diff --git a/test/results/wallhaven.py b/test/results/wallhaven.py
@@ -12,6 +12,14 @@
     "#url"     : "https://wallhaven.cc/search?q=touhou",
     "#category": ("", "wallhaven", "search"),
     "#class"   : wallhaven.WallhavenSearchExtractor,
+    "#pattern" : r"https://w\.wallhaven\.cc/full/\w\w/wallhaven-\w+\.\w+",
+    "#range"   : "1-10",
+
+    "search": {
+        "q"     : "touhou",
+        "tags"  : "touhou",
+        "tag_id": 0,
+    },
 },
 
 {
@@ -20,6 +28,17 @@
     "#class"   : wallhaven.WallhavenSearchExtractor,
     "#pattern" : r"https://w\.wallhaven\.cc/full/\w\w/wallhaven-\w+\.\w+",
     "#count"   : "<= 30",
+
+    "search": {
+        "categories": "111",
+        "order"     : "asc",
+        "page"      : "3",
+        "purity"    : "100",
+        "sorting"   : "date_added",
+        "q"         : "id:87",
+        "tags"      : "Fujibayashi Kyou",
+        "tag_id"    : 87,
+    },
 },
 
 {

diff --git a/test/test_util.py b/test/test_util.py
@@ -459,6 +459,15 @@ def test_contains(self):
         self.assertFalse(util.contains(s, "tag1"))
         self.assertFalse(util.contains(s, ["tag1", "tag2", "tag3"]))
 
+        self.assertTrue(util.contains(s, "(+)", ""))
+        self.assertTrue(util.contains(s, ["(-)", "(+)"], ""))
+        self.assertTrue(util.contains(s, "(+)", 0))
+        self.assertTrue(util.contains(s, "(+)", False))
+
+        self.assertFalse(util.contains(s, "(+)", None))
+        self.assertTrue(util.contains(s, "y(+)c", None))
+        self.assertTrue(util.contains(s, ["(-)", "(+)", "bar"], None))
+
         s = "1, 2, 3, asd, qwe, y(+)c, f(+)(-), bar"
         self.assertTrue(util.contains(s, "y(+)c", ", "))
         self.assertTrue(util.contains(s, ["sdf", "dfg", "qwe"], ", "))