diff --git a/docs/configuration.rst b/docs/configuration.rst index 1d8c970d29..11d381672e 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -405,6 +405,8 @@ Default ``flickr``, ``weibo``, ``[wikimedia]`` + * ``"1.4"`` + ``wallhaven`` * ``"2.0-4.0"`` ``behance``, ``imagefap``, diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 3d7386981c..512cd0325e 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -691,6 +691,8 @@ "wallhaven": { "api-key" : null, + "sleep-request": "1.4", + "include" : ["uploads"], "metadata": false }, diff --git a/docs/options.md b/docs/options.md index 2116da636b..dd0b58df3a 100644 --- a/docs/options.md +++ b/docs/options.md @@ -48,13 +48,13 @@ -K, --list-keywords Print a list of available keywords and example values for the given URLs -e, --error-file FILE Add input URLs which returned an error to FILE - -N, --print [EVENT:]FORMAT Write FORMAT during EVENT (default 'prepare') to - standard output. Examples: 'id' or + -N, --print [EVENT:]FORMAT Write FORMAT during EVENT (default 'prepare') + to standard output. Examples: 'id' or 'post:{md5[:8]}' --print-to-file [EVENT:]FORMAT FILE Append FORMAT during EVENT to FILE --list-modules Print a list of available extractor modules - --list-extractors CATEGORIES + --list-extractors [CATEGORIES] Print a list of extractor classes with description, (sub)category and example URL --write-log FILE Write logging output to FILE @@ -67,7 +67,8 @@ ## Networking Options: -R, --retries N Maximum number of retries for failed HTTP - requests or -1 for infinite retries (default: 4) + requests or -1 for infinite retries (default: + 4) --http-timeout SECONDS Timeout for HTTP connections (default: 30.0) --proxy URL Use the specified proxy --source-address IP Client-side IP address to bind to diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 6207bf7484..d3e40eefdc 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -111,6 +111,7 @@ def _extract_files(self, work): { "url" : img["image_urls"]["original"], "suffix": "_p{:02}".format(num), + "_fallback": self._fallback_image(img), } for num, img in enumerate(meta_pages) ] @@ -128,7 +129,7 @@ def _extract_files(self, work): self.log.warning("%s: 'My pixiv' locked", work["id"]) elif work["type"] != "ugoira": - return ({"url": url},) + return ({"url": url, "_fallback": self._fallback_image(url)},) elif self.load_ugoira: try: @@ -269,6 +270,24 @@ def _extract_ajax_url(self, body): except exception.HttpError: pass + def _fallback_image(self, src): + if isinstance(src, str): + urls = None + orig = src + else: + urls = src["image_urls"] + orig = urls["original"] + + base = orig.rpartition(".")[0] + yield base.replace("-original/", "-master/", 1) + "_master1200.jpg" + + if urls is None: + return + + for fmt in ("large", "medium", "square_medium"): + if fmt in urls: + yield urls[fmt] + @staticmethod def _date_from_url(url, offset=timedelta(hours=9)): try: diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py index be0dbde2d5..0bacd54f27 100644 --- a/gallery_dl/extractor/plurk.py +++ b/gallery_dl/extractor/plurk.py @@ -104,16 +104,16 @@ class PlurkPostExtractor(PlurkExtractor): pattern = r"(?:https?://)?(?:www\.)?plurk\.com/p/(\w+)" example = "https://www.plurk.com/p/12345" - def __init__(self, match): - PlurkExtractor.__init__(self, match) - self.plurk_id = match.group(1) - def plurks(self): - url = "{}/p/{}".format(self.root, self.plurk_id) + url = "{}/p/{}".format(self.root, self.groups[0]) page = self.request(url).text - user, pos = text.extract(page, " GLOBAL = ", "\n") - data, pos = text.extract(page, "plurk = ", ";\n", pos) + user, pos = text.extract(page, " GLOBAL=", "\n") + data, pos = text.extract(page, "plurk =", ";\n", pos) data = self._load(data) - data["user"] = self._load(user)["page_user"] + try: + data["user"] = self._load(user)["page_user"] + except Exception: + self.log.warning("%s: Failed to extract 'user' data", + self.groups[0]) return (data,) diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py index 479e8a859e..e5b764ab20 100644 --- a/gallery_dl/extractor/wallhaven.py +++ b/gallery_dl/extractor/wallhaven.py @@ -54,7 +54,7 @@ def _transform(wp): class WallhavenSearchExtractor(WallhavenExtractor): """Extractor for search results on wallhaven.cc""" subcategory = "search" - directory_fmt = ("{category}", "{search[q]}") + directory_fmt = ("{category}", "{search[tags]}") archive_fmt = "s_{search[q]}_{id}" pattern = r"(?:https?://)?wallhaven\.cc/search(?:/?\?([^#]+))?" example = "https://wallhaven.cc/search?q=QUERY" @@ -64,7 +64,7 @@ def __init__(self, match): self.params = text.parse_query(match.group(1)) def wallpapers(self): - return self.api.search(self.params.copy()) + return self.api.search(self.params) def metadata(self): return {"search": self.params} @@ -141,7 +141,7 @@ def __init__(self, match): def wallpapers(self): params = {"q": "@" + self.username} - return self.api.search(params.copy()) + return self.api.search(params) def metadata(self): return {"username": self.username} @@ -215,20 +215,35 @@ def _call(self, endpoint, params=None): def _pagination(self, endpoint, params=None, metadata=None): if params is None: + params_ptr = None params = {} + else: + params_ptr = params + params = params.copy() if metadata is None: metadata = self.extractor.config("metadata") while True: data = self._call(endpoint, params) + meta = data.get("meta") + if params_ptr is not None: + if meta and "query" in meta: + query = meta["query"] + if isinstance(query, dict): + params_ptr["tags"] = query.get("tag") + params_ptr["tag_id"] = query.get("id") + else: + params_ptr["tags"] = query + params_ptr["tag_id"] = 0 + params_ptr = None + if metadata: for wp in data["data"]: yield self.info(str(wp["id"])) else: yield from data["data"] - meta = data.get("meta") if not meta or meta["current_page"] >= meta["last_page"]: return params["page"] = meta["current_page"] + 1 diff --git a/gallery_dl/option.py b/gallery_dl/option.py index a3f78e5b83..222679a494 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -323,7 +323,7 @@ def build_parser(): input.add_argument( "--no-input", dest="input", nargs=0, action=ConfigConstAction, const=False, - help=("Do not prompt for passwords/tokens"), + help="Do not prompt for passwords/tokens", ) output = parser.add_argument_group("Output Options") @@ -406,7 +406,7 @@ def build_parser(): ) output.add_argument( "--list-extractors", - dest="list_extractors", metavar="CATEGORIES", nargs="*", + dest="list_extractors", metavar="[CATEGORIES]", nargs="*", help=("Print a list of extractor classes " "with description, (sub)category and example URL"), ) @@ -430,12 +430,12 @@ def build_parser(): output.add_argument( "--print-traffic", dest="print_traffic", action="store_true", - help=("Display sent and read HTTP traffic"), + help="Display sent and read HTTP traffic", ) output.add_argument( "--no-colors", dest="colors", action="store_false", - help=("Do not emit ANSI color codes in output"), + help="Do not emit ANSI color codes in output", ) networking = parser.add_argument_group("Networking Options") diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 72ec98ee15..230208816f 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -83,7 +83,7 @@ def unique_sequence(iterable): def contains(values, elements, separator=" "): """Returns True if at least one of 'elements' is contained in 'values'""" - if isinstance(values, str): + if isinstance(values, str) and (separator or separator is None): values = values.split(separator) if not isinstance(elements, (tuple, list)): diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 4b28924e0a..f1c1a72b11 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.28.3" +__version__ = "1.28.4-dev" __variant__ = None diff --git a/test/results/pixiv.py b/test/results/pixiv.py index c4ebc94fb6..00a8a673e9 100644 --- a/test/results/pixiv.py +++ b/test/results/pixiv.py @@ -289,6 +289,15 @@ "caption": r"re:Either she doesn't know how to pose or she can't move with that much clothing on her, in any case she's very well dressed for a holiday trip around town. Lots of stuff to see and a perfect day to grab some sweet pastries at the bakery.
...", }, +{ + "#url" : "https://www.pixiv.net/artworks/56360615", + "#comment" : "fallback; 'original' version results in HTTP 500 error (#6762)", + "#class" : pixiv.PixivWorkExtractor, + "#options" : {"retries": 0}, + "#range" : "4", + "#sha1_content": "aa119c27fec0a36bbd06e7491987acf5f1be6293", +}, + { "#url" : "https://www.pixiv.net/en/artworks/966412", "#category": ("", "pixiv", "work"), diff --git a/test/results/wallhaven.py b/test/results/wallhaven.py index 47a8ba777c..cf942aafa5 100644 --- a/test/results/wallhaven.py +++ b/test/results/wallhaven.py @@ -12,6 +12,14 @@ "#url" : "https://wallhaven.cc/search?q=touhou", "#category": ("", "wallhaven", "search"), "#class" : wallhaven.WallhavenSearchExtractor, + "#pattern" : r"https://w\.wallhaven\.cc/full/\w\w/wallhaven-\w+\.\w+", + "#range" : "1-10", + + "search": { + "q" : "touhou", + "tags" : "touhou", + "tag_id": 0, + }, }, { @@ -20,6 +28,17 @@ "#class" : wallhaven.WallhavenSearchExtractor, "#pattern" : r"https://w\.wallhaven\.cc/full/\w\w/wallhaven-\w+\.\w+", "#count" : "<= 30", + + "search": { + "categories": "111", + "order" : "asc", + "page" : "3", + "purity" : "100", + "sorting" : "date_added", + "q" : "id:87", + "tags" : "Fujibayashi Kyou", + "tag_id" : 87, + }, }, { diff --git a/test/test_util.py b/test/test_util.py index fa16c443d1..27f78ece70 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -459,6 +459,15 @@ def test_contains(self): self.assertFalse(util.contains(s, "tag1")) self.assertFalse(util.contains(s, ["tag1", "tag2", "tag3"])) + self.assertTrue(util.contains(s, "(+)", "")) + self.assertTrue(util.contains(s, ["(-)", "(+)"], "")) + self.assertTrue(util.contains(s, "(+)", 0)) + self.assertTrue(util.contains(s, "(+)", False)) + + self.assertFalse(util.contains(s, "(+)", None)) + self.assertTrue(util.contains(s, "y(+)c", None)) + self.assertTrue(util.contains(s, ["(-)", "(+)", "bar"], None)) + s = "1, 2, 3, asd, qwe, y(+)c, f(+)(-), bar" self.assertTrue(util.contains(s, "y(+)c", ", ")) self.assertTrue(util.contains(s, ["sdf", "dfg", "qwe"], ", "))