From 86b1ee19e0a6098de1b0fbaab5eae08542eec571 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sat, 4 Jan 2025 11:23:07 -0500
Subject: [PATCH] fix some lint issues

---
 catalog/common/sites.py       |   2 +-
 catalog/search/models.py      |   3 +
 catalog/sites/douban.py       |  34 ++++++++++
 catalog/sites/douban_book.py  | 109 +++++++++++++++++-------------
 catalog/sites/douban_drama.py |  96 +++++++++++++++-----------
 catalog/sites/douban_game.py  |  46 +++++++------
 catalog/sites/douban_movie.py | 124 +++++++++++++++++++---------------
 catalog/sites/douban_music.py |  66 ++++++++++--------
 pyproject.toml                |   1 -
 9 files changed, 288 insertions(+), 193 deletions(-)

diff --git a/catalog/common/sites.py b/catalog/common/sites.py
index d4fdc964..c3acee24 100644
--- a/catalog/common/sites.py
+++ b/catalog/common/sites.py
@@ -104,7 +104,7 @@ def query_str(content, query: str) -> str:
         return content.xpath(query)[0].strip()
 
     @staticmethod
-    def query_list(content, query: str) -> list[str]:
+    def query_list(content, query: str) -> list:
         return list(content.xpath(query))
 
     @classmethod
diff --git a/catalog/search/models.py b/catalog/search/models.py
index 90e9e251..f6279539 100644
--- a/catalog/search/models.py
+++ b/catalog/search/models.py
@@ -79,6 +79,9 @@ def __init__(
         self.display_description = brief
         self.cover_image_url = cover_url
 
+    def __repr__(self):
+        return f"[{self.category}] {self.display_title} {self.url}"
+
     @property
     def verbose_category_name(self):
         return self.category.label if self.category else ""
diff --git a/catalog/sites/douban.py b/catalog/sites/douban.py
index a9c6e2c1..f228aaa2 100644
--- a/catalog/sites/douban.py
+++ b/catalog/sites/douban.py
@@ -1,6 +1,8 @@
+import json
 import re
 
 from catalog.common import *
+from catalog.search.models import ExternalSearchResultItem
 
 RE_NUMBERS = re.compile(r"\d+\d*")
 RE_WHITESPACES = re.compile(r"\s+")
@@ -30,3 +32,35 @@ def validate_response(self, response) -> int:
                 return RESPONSE_OK
         else:
             return RESPONSE_INVALID_CONTENT
+
+
+class DoubanSearcher:
+    @classmethod
+    def search(cls, cat: ItemCategory, c: str, q: str, p: int = 1):
+        url = f"https://search.douban.com/{c}/subject_search?search_text={q}&start={15*(p-1)}"
+        content = DoubanDownloader(url).download().html()
+        j = json.loads(
+            content.xpath(
+                "//script[text()[contains(.,'window.__DATA__')]]/text()"
+            )[  # type:ignore
+                0
+            ]
+            .split("window.__DATA__ = ")[1]  # type:ignore
+            .split("};")[0]  # type:ignore
+            + "}"
+        )
+        results = [
+            ExternalSearchResultItem(
+                cat,
+                SiteName.Douban,
+                item["url"],
+                item["title"],
+                item["abstract"],
+                item["abstract_2"],
+                item["cover_url"],
+            )
+            for item in j["items"]
+            for item in j["items"]
+            if item.get("tpl_name") == "search_subject"
+        ]
+        return results
diff --git a/catalog/sites/douban_book.py b/catalog/sites/douban_book.py
index 857fb043..2a6413a7 100644
--- a/catalog/sites/douban_book.py
+++ b/catalog/sites/douban_book.py
@@ -3,7 +3,7 @@
 from catalog.common import *
 from common.models.lang import detect_language
 
-from .douban import *
+from .douban import RE_NUMBERS, RE_WHITESPACES, DoubanDownloader, DoubanSearcher
 
 
 @SiteManager.register
@@ -23,46 +23,51 @@ class DoubanBook(AbstractSite):
     def id_to_url(cls, id_value):
         return "https://book.douban.com/subject/" + id_value + "/"
 
+    @classmethod
+    def search(cls, q: str, p: int = 1):
+        return DoubanSearcher.search(ItemCategory.Book, "book", q, p)
+
     def scrape(self):
         content = DoubanDownloader(self.url).download().html()
 
-        isbn_elem = content.xpath(
-            "//div[@id='info']//span[text()='ISBN:']/following::text()"
+        isbn_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='ISBN:']/following::text()"
         )
         isbn = isbn_elem[0].strip() if isbn_elem else None
 
-        title_elem = content.xpath("/html/body//h1/span/text()")
+        title_elem = self.query_list(content, "/html/body//h1/span/text()")
         title = (
             title_elem[0].strip() if title_elem else f"Unknown Title {self.id_value}"
         )
 
-        subtitle_elem = content.xpath(
-            "//div[@id='info']//span[text()='副标题:']/following::text()"
+        subtitle_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='副标题:']/following::text()"
         )
         subtitle = subtitle_elem[0].strip()[:500] if subtitle_elem else None
 
-        orig_title_elem = content.xpath(
-            "//div[@id='info']//span[text()='原作名:']/following::text()"
+        orig_title_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='原作名:']/following::text()"
         )
         orig_title = orig_title_elem[0].strip()[:500] if orig_title_elem else None
 
-        language_elem = content.xpath(
-            "//div[@id='info']//span[text()='语言:']/following::text()"
+        language_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='语言:']/following::text()"
         )
         language = [language_elem[0].strip()] if language_elem else []
 
-        pub_house_elem = content.xpath(
-            "//div[@id='info']//span[text()='出版社:']/following::text()"
+        pub_house_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='出版社:']/following::text()"
         )
         pub_house = pub_house_elem[0].strip() if pub_house_elem else None
         if not pub_house:
-            pub_house_elem = content.xpath(
-                "//div[@id='info']//span[text()='出版社:']/following-sibling::a/text()"
+            pub_house_elem = self.query_list(
+                content,
+                "//div[@id='info']//span[text()='出版社:']/following-sibling::a/text()",
             )
             pub_house = pub_house_elem[0].strip() if pub_house_elem else None
 
-        pub_date_elem = content.xpath(
-            "//div[@id='info']//span[text()='出版年:']/following::text()"
+        pub_date_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='出版年:']/following::text()"
         )
         pub_date = pub_date_elem[0].strip() if pub_date_elem else ""
         year_month_day = RE_NUMBERS.findall(pub_date)
@@ -88,18 +93,18 @@ def scrape(self):
             else pub_month
         )
 
-        binding_elem = content.xpath(
-            "//div[@id='info']//span[text()='装帧:']/following::text()"
+        binding_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='装帧:']/following::text()"
         )
         binding = binding_elem[0].strip() if binding_elem else None
 
-        price_elem = content.xpath(
-            "//div[@id='info']//span[text()='定价:']/following::text()"
+        price_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='定价:']/following::text()"
         )
         price = price_elem[0].strip() if price_elem else None
 
-        pages_elem = content.xpath(
-            "//div[@id='info']//span[text()='页数:']/following::text()"
+        pages_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='页数:']/following::text()"
         )
         pages = pages_elem[0].strip() if pages_elem else None
         if pages is not None:
@@ -109,15 +114,16 @@ def scrape(self):
             if pages and (pages > 999999 or pages < 1):
                 pages = None
 
-        brief_elem = content.xpath(
-            "//h2/span[text()='内容简介']/../following-sibling::div[1]//div[@class='intro'][not(ancestor::span[@class='short'])]/p/text()"
+        brief_elem = self.query_list(
+            content,
+            "//h2/span[text()='内容简介']/../following-sibling::div[1]//div[@class='intro'][not(ancestor::span[@class='short'])]/p/text()",
         )
         brief = "\n".join(p.strip() for p in brief_elem) if brief_elem else None
 
         contents = None
         try:
-            contents_elem = content.xpath(
-                "//h2/span[text()='目录']/../following-sibling::div[1]"
+            contents_elem = self.query_list(
+                content, "//h2/span[text()='目录']/../following-sibling::div[1]"
             )[0]
             # if next the id of next sibling contains `dir`, that would be the full contents
             if "dir" in contents_elem.getnext().xpath("@id")[0]:
@@ -129,24 +135,28 @@ def scrape(self):
                 )
             else:
                 contents = (
-                    "\n".join(p.strip() for p in contents_elem.xpath("text()"))
+                    "\n".join(
+                        p.strip() for p in self.query_list(contents_elem, "text()")
+                    )
                     if contents_elem is not None
                     else None
                 )
         except Exception:
             pass
 
-        img_url_elem = content.xpath("//*[@id='mainpic']/a/img/@src")
+        img_url_elem = self.query_list(content, "//*[@id='mainpic']/a/img/@src")
         img_url = img_url_elem[0].strip() if img_url_elem else None
 
         # there are two html formats for authors and translators
-        authors_elem = content.xpath(
+        authors_elem = self.query_list(
+            content,
             """//div[@id='info']//span[text()='作者:']/following-sibling::br[1]/
-            preceding-sibling::a[preceding-sibling::span[text()='作者:']]/text()"""
+            preceding-sibling::a[preceding-sibling::span[text()='作者:']]/text()""",
         )
         if not authors_elem:
-            authors_elem = content.xpath(
-                """//div[@id='info']//span[text()=' 作者']/following-sibling::a/text()"""
+            authors_elem = self.query_list(
+                content,
+                """//div[@id='info']//span[text()=' 作者']/following-sibling::a/text()""",
             )
         if authors_elem:
             authors = []
@@ -155,13 +165,15 @@ def scrape(self):
         else:
             authors = None
 
-        translators_elem = content.xpath(
+        translators_elem = self.query_list(
+            content,
             """//div[@id='info']//span[text()='译者:']/following-sibling::br[1]/
-            preceding-sibling::a[preceding-sibling::span[text()='译者:']]/text()"""
+            preceding-sibling::a[preceding-sibling::span[text()='译者:']]/text()""",
         )
         if not translators_elem:
-            translators_elem = content.xpath(
-                """//div[@id='info']//span[text()=' 译者']/following-sibling::a/text()"""
+            translators_elem = self.query_list(
+                content,
+                """//div[@id='info']//span[text()=' 译者']/following-sibling::a/text()""",
             )
         if translators_elem:
             translators = []
@@ -170,18 +182,20 @@ def scrape(self):
         else:
             translators = None
 
-        cncode_elem = content.xpath(
-            "//div[@id='info']//span[text()='统一书号:']/following::text()"
+        cncode_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='统一书号:']/following::text()"
         )
         cubn = cncode_elem[0].strip() if cncode_elem else None
 
-        series_elem = content.xpath(
-            "//div[@id='info']//span[text()='丛书:']/following-sibling::a[1]/text()"
+        series_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='丛书:']/following-sibling::a[1]/text()",
         )
         series = series_elem[0].strip() if series_elem else None
 
-        imprint_elem = content.xpath(
-            "//div[@id='info']//span[text()='出品方:']/following-sibling::a[1]/text()"
+        imprint_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='出品方:']/following-sibling::a[1]/text()",
         )
         imprint = imprint_elem[0].strip() if imprint_elem else None
 
@@ -212,8 +226,9 @@ def scrape(self):
             "cover_image_url": img_url,
         }
 
-        works_element = content.xpath(
-            '//h2/span[text()="这本书的其他版本"]/following-sibling::span[@class="pl"]/a/@href'
+        works_element = self.query_list(
+            content,
+            '//h2/span[text()="这本书的其他版本"]/following-sibling::span[@class="pl"]/a/@href',
         )
         if works_element:
             r = re.match(r"\w+://book.douban.com/works/(\d+)", works_element[0])
@@ -234,7 +249,7 @@ def scrape(self):
             ]
 
         pd = ResourceContent(metadata=data)
-        t, n = detect_isbn_asin(isbn)
+        t, n = detect_isbn_asin(isbn or "")
         if t:
             pd.lookup_ids[t] = n
         pd.lookup_ids[IdType.CUBN] = cubn
@@ -255,11 +270,11 @@ def id_to_url(cls, id_value):
 
     def scrape(self):
         content = DoubanDownloader(self.url).download().html()
-        title_elem = content.xpath("//h1/text()")
+        title_elem = self.query_list(content, "//h1/text()")
         title = title_elem[0].split("全部版本(")[0].strip() if title_elem else None
         if not title:
             raise ParseError(self, "title")
-        book_urls = content.xpath('//a[@class="pl2"]/@href')
+        book_urls = self.query_list(content, '//a[@class="pl2"]/@href')
         related_resources = []
         for url in book_urls:
             site = SiteManager.get_site_by_url(url)
diff --git a/catalog/sites/douban_drama.py b/catalog/sites/douban_drama.py
index c277d0e0..63d10141 100644
--- a/catalog/sites/douban_drama.py
+++ b/catalog/sites/douban_drama.py
@@ -7,7 +7,7 @@
 from catalog.models import *
 from common.models.lang import detect_language
 
-from .douban import DoubanDownloader
+from .douban import DoubanDownloader, DoubanSearcher
 
 
 def _cache_key(url):
@@ -45,6 +45,8 @@ def id_to_url(cls, id_value):
         return f"https://www.douban.com/location/drama/{ids[0]}/#{ids[1]}"
 
     def scrape(self):
+        if not self.id_value or not self.url:
+            raise ParseError(self, "id_value or url")
         show_url = self.url.split("#")[0]
         show_id = self.id_value.split("-")[0]
         version_id = self.id_value.split("-")[1]
@@ -59,20 +61,20 @@ def scrape(self):
         p = "//div[@id='" + version_id + "']"
         q = p + "//dt[text()='{}：']/following-sibling::dd[1]/a/span/text()"
         q2 = p + "//dt[text()='{}：']/following-sibling::dd[1]/text()"
-        title = " ".join(h.xpath(p + "//h3/text()")).strip()
+        title = " ".join(self.query_list(h, p + "//h3/text()")).strip()
         if not title:
             raise ParseError(self, "title")
         data = {
             "title": title,
             "localized_title": [{"lang": "zh-cn", "text": title}],
-            "director": [x.strip() for x in h.xpath(q.format("导演"))],
-            "playwright": [x.strip() for x in h.xpath(q.format("编剧"))],
-            # "actor": [x.strip() for x in h.xpath(q.format("主演"))],
-            "composer": [x.strip() for x in h.xpath(q.format("作曲"))],
-            "language": [x.strip() for x in h.xpath(q2.format("语言"))],
-            "opening_date": " ".join(h.xpath(q2.format("演出日期"))).strip(),
-            "troupe": [x.strip() for x in h.xpath(q.format("演出团体"))],
-            "location": [x.strip() for x in h.xpath(q.format("演出剧院"))],
+            "director": [x.strip() for x in self.query_list(h, q.format("导演"))],
+            "playwright": [x.strip() for x in self.query_list(h, q.format("编剧"))],
+            # "actor": [x.strip() for x in self.query_list(h, q.format("主演"))],
+            "composer": [x.strip() for x in self.query_list(h, q.format("作曲"))],
+            "language": [x.strip() for x in self.query_list(h, q2.format("语言"))],
+            "opening_date": " ".join(self.query_list(h, q2.format("演出日期"))).strip(),
+            "troupe": [x.strip() for x in self.query_list(h, q.format("演出团体"))],
+            "location": [x.strip() for x in self.query_list(h, q.format("演出剧院"))],
         }
         if data["opening_date"]:
             d = data["opening_date"].split("-")
@@ -80,7 +82,9 @@ def scrape(self):
             if dl > 3:
                 data["opening_date"] = "-".join(d[:3])
                 data["closing_date"] = "-".join(d[0 : 6 - dl] + d[3:dl])
-        actor_elem = h.xpath(p + "//dt[text()='主演：']/following-sibling::dd[1]/a")
+        actor_elem = self.query_list(
+            h, p + "//dt[text()='主演：']/following-sibling::dd[1]/a"
+        )
         data["actor"] = []
         for e in actor_elem:
             n = "".join(e.xpath("span/text()")).strip()
@@ -88,7 +92,7 @@ def scrape(self):
             t = re.sub(r"^[\s\(饰]*(.+)\)[\s\/]*$", r"\1", t).strip()
             t = t if t != "/" else ""
             data["actor"].append({"name": n, "role": t})
-        img_url_elem = h.xpath("//img[@itemprop='image']/@src")
+        img_url_elem = self.query_list(h, "//img[@itemprop='image']/@src")
         data["cover_image_url"] = img_url_elem[0].strip() if img_url_elem else None
         pd = ResourceContent(metadata=data)
         pd.metadata["required_resources"] = [
@@ -128,78 +132,87 @@ def scrape(self):
         h = html.fromstring(r)
         data = {}
 
-        title_elem = h.xpath("/html/body//h1/span/text()")
+        title_elem = self.query_list(h, "/html/body//h1/span/text()")
         if title_elem:
             data["title"] = title_elem[0].strip()
             data["orig_title"] = title_elem[1] if len(title_elem) > 1 else None
         else:
             raise ParseError(self, "title")
 
-        other_title_elem = h.xpath(
-            "//dl//dt[text()='又名：']/following::dd[@itemprop='name']/text()"
+        other_title_elem = self.query_list(
+            h, "//dl//dt[text()='又名：']/following::dd[@itemprop='name']/text()"
         )
         data["other_title"] = other_title_elem
 
-        plot_elem = h.xpath("//div[@class='pure-text']/div[@class='full']/text()")
+        plot_elem = self.query_list(
+            h, "//div[@class='pure-text']/div[@class='full']/text()"
+        )
         if len(plot_elem) == 0:
-            plot_elem = h.xpath(
-                "//div[@class='pure-text']/div[@class='abstract']/text()"
+            plot_elem = self.query_list(
+                h, "//div[@class='pure-text']/div[@class='abstract']/text()"
             )
         if len(plot_elem) == 0:
-            plot_elem = h.xpath("//div[@class='pure-text']/text()")
+            plot_elem = self.query_list(h, "//div[@class='pure-text']/text()")
         data["brief"] = "\n".join(plot_elem)
 
         data["genre"] = [
             s.strip()
-            for s in h.xpath(
-                "//div[@class='meta']//dl//dt[text()='类型：']/following-sibling::dd[@itemprop='genre']/text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']//dl//dt[text()='类型：']/following-sibling::dd[@itemprop='genre']/text()",
             )
         ]
         # data["version"] = [
         #     s.strip()
-        #     for s in h.xpath(
+        #     for s in self.query_list(h,
         #         "//dl//dt[text()='版本：']/following-sibling::dd[@class='titles']/a//text()"
         #     )
         # ]
         data["director"] = [
             s.strip()
-            for s in h.xpath(
-                "//div[@class='meta']/dl//dt[text()='导演：']/following-sibling::dd/a[@itemprop='director']//text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']/dl//dt[text()='导演：']/following-sibling::dd/a[@itemprop='director']//text()",
             )
         ]
         data["composer"] = [
             s.strip()
-            for s in h.xpath(
-                "//div[@class='meta']/dl//dt[text()='作曲：']/following-sibling::dd/a[@itemprop='musicBy']//text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']/dl//dt[text()='作曲：']/following-sibling::dd/a[@itemprop='musicBy']//text()",
             )
         ]
         data["choreographer"] = [
             s.strip()
-            for s in h.xpath(
-                "//div[@class='meta']/dl//dt[text()='编舞：']/following-sibling::dd/a[@itemprop='choreographer']//text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']/dl//dt[text()='编舞：']/following-sibling::dd/a[@itemprop='choreographer']//text()",
             )
         ]
         data["troupe"] = [
             s.strip()
-            for s in h.xpath(
-                "//div[@class='meta']/dl//dt[text()='演出团体：']/following-sibling::dd/a[@itemprop='performer']//text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']/dl//dt[text()='演出团体：']/following-sibling::dd/a[@itemprop='performer']//text()",
             )
         ]
         data["playwright"] = [
             s.strip()
-            for s in h.xpath(
-                "//div[@class='meta']/dl//dt[text()='编剧：']/following-sibling::dd/a[@itemprop='author']//text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']/dl//dt[text()='编剧：']/following-sibling::dd/a[@itemprop='author']//text()",
             )
         ]
         data["actor"] = [
             {"name": s.strip(), "role": ""}
-            for s in h.xpath(
-                "//div[@class='meta']/dl//dt[text()='主演：']/following-sibling::dd/a[@itemprop='actor']//text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']/dl//dt[text()='主演：']/following-sibling::dd/a[@itemprop='actor']//text()",
             )
         ]
 
-        date_elem = h.xpath(
-            "//div[@class='meta']//dl//dt[text()='演出日期：']/following::dd/text()"
+        date_elem = self.query_list(
+            h, "//div[@class='meta']//dl//dt[text()='演出日期：']/following::dd/text()"
         )
         data["opening_date"] = date_elem[0] if date_elem else None
         if data["opening_date"]:
@@ -211,12 +224,15 @@ def scrape(self):
 
         data["location"] = [
             s.strip()
-            for s in h.xpath(
-                "//div[@class='meta']/dl//dt[text()='演出剧院：']/following-sibling::dd/a[@itemprop='location']//text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']/dl//dt[text()='演出剧院：']/following-sibling::dd/a[@itemprop='location']//text()",
             )
         ]
 
-        versions = h.xpath("//div[@id='versions']/div[@class='fluid-mods']/div/@id")
+        versions = self.query_list(
+            h, "//div[@id='versions']/div[@class='fluid-mods']/div/@id"
+        )
         data["related_resources"] = list(
             map(
                 lambda v: {
@@ -229,7 +245,7 @@ def scrape(self):
                 versions,
             )
         )
-        img_url_elem = h.xpath("//img[@itemprop='image']/@src")
+        img_url_elem = self.query_list(h, "//img[@itemprop='image']/@src")
         data["cover_image_url"] = img_url_elem[0].strip() if img_url_elem else None
         data["localized_title"] = (
             [{"lang": "zh-cn", "text": data["title"]}]
diff --git a/catalog/sites/douban_game.py b/catalog/sites/douban_game.py
index a867e31d..0fce5479 100644
--- a/catalog/sites/douban_game.py
+++ b/catalog/sites/douban_game.py
@@ -7,9 +7,7 @@
 from common.models.lang import detect_language
 from common.models.misc import uniq
 
-from .douban import DoubanDownloader
-
-_logger = logging.getLogger(__name__)
+from .douban import DoubanDownloader, DoubanSearcher
 
 
 @SiteManager.register
@@ -26,18 +24,18 @@ class DoubanGame(AbstractSite):
     DEFAULT_MODEL = Game
 
     @classmethod
-    def id_to_url(self, id_value):
+    def id_to_url(cls, id_value):
         return "https://www.douban.com/game/" + id_value + "/"
 
     def scrape(self):
         content = DoubanDownloader(self.url).download().html()
 
-        elem = content.xpath("//div[@id='content']/h1/text()")
+        elem = self.query_list(content, "//div[@id='content']/h1/text()")
         title = elem[0].strip() if len(elem) else None
         if not title:
             raise ParseError(self, "title")
 
-        elem = content.xpath("//div[@id='comments']//h2/text()")
+        elem = self.query_list(content, "//div[@id='comments']//h2/text()")
         title2 = elem[0].strip() if len(elem) else ""
         if title2:
             sp = title2.strip().rsplit("的短评", 1)
@@ -48,46 +46,52 @@ def scrape(self):
         else:
             orig_title = ""
 
-        other_title_elem = content.xpath(
-            "//dl[@class='thing-attr']//dt[text()='别名:']/following-sibling::dd[1]/text()"
+        other_title_elem = self.query_list(
+            content,
+            "//dl[@class='thing-attr']//dt[text()='别名:']/following-sibling::dd[1]/text()",
         )
         other_title = (
             other_title_elem[0].strip().split(" / ") if other_title_elem else []
         )
 
-        developer_elem = content.xpath(
-            "//dl[@class='thing-attr']//dt[text()='开发商:']/following-sibling::dd[1]/text()"
+        developer_elem = self.query_list(
+            content,
+            "//dl[@class='thing-attr']//dt[text()='开发商:']/following-sibling::dd[1]/text()",
         )
         developer = developer_elem[0].strip().split(" / ") if developer_elem else None
 
-        publisher_elem = content.xpath(
-            "//dl[@class='thing-attr']//dt[text()='发行商:']/following-sibling::dd[1]/text()"
+        publisher_elem = self.query_list(
+            content,
+            "//dl[@class='thing-attr']//dt[text()='发行商:']/following-sibling::dd[1]/text()",
         )
         publisher = publisher_elem[0].strip().split(" / ") if publisher_elem else None
 
-        platform_elem = content.xpath(
-            "//dl[@class='thing-attr']//dt[text()='平台:']/following-sibling::dd[1]/a/text()"
+        platform_elem = self.query_list(
+            content,
+            "//dl[@class='thing-attr']//dt[text()='平台:']/following-sibling::dd[1]/a/text()",
         )
         platform = platform_elem if platform_elem else None
 
-        genre_elem = content.xpath(
-            "//dl[@class='thing-attr']//dt[text()='类型:']/following-sibling::dd[1]/a/text()"
+        genre_elem = self.query_list(
+            content,
+            "//dl[@class='thing-attr']//dt[text()='类型:']/following-sibling::dd[1]/a/text()",
         )
         genre = None
         if genre_elem:
             genre = [g for g in genre_elem if g != "游戏"]
 
-        date_elem = content.xpath(
-            "//dl[@class='thing-attr']//dt[text()='发行日期:']/following-sibling::dd[1]/text()"
+        date_elem = self.query_list(
+            content,
+            "//dl[@class='thing-attr']//dt[text()='发行日期:']/following-sibling::dd[1]/text()",
         )
         release_date = dateparser.parse(date_elem[0].strip()) if date_elem else None
         release_date = release_date.strftime("%Y-%m-%d") if release_date else None
 
-        brief_elem = content.xpath("//div[@class='mod item-desc']/p/text()")
+        brief_elem = self.query_list(content, "//div[@class='mod item-desc']/p/text()")
         brief = "\n".join(brief_elem) if brief_elem else ""
 
-        img_url_elem = content.xpath(
-            "//div[@class='item-subject-info']/div[@class='pic']//img/@src"
+        img_url_elem = self.query_list(
+            content, "//div[@class='item-subject-info']/div[@class='pic']//img/@src"
         )
         img_url = img_url_elem[0].strip() if img_url_elem else None
 
diff --git a/catalog/sites/douban_movie.py b/catalog/sites/douban_movie.py
index 8a1b25f1..36ccb87a 100644
--- a/catalog/sites/douban_movie.py
+++ b/catalog/sites/douban_movie.py
@@ -1,16 +1,17 @@
 import json
 import logging
 
+from loguru import logger
+
 from catalog.common import *
 from catalog.movie.models import *
 from catalog.tv.models import *
 from common.models.lang import detect_language
+from common.models.misc import int_
 
-from .douban import *
+from .douban import DoubanDownloader, DoubanSearcher
 from .tmdb import TMDB_TV, TMDB_TVSeason, query_tmdb_tv_episode, search_tmdb_by_imdb_id
 
-_logger = logging.getLogger(__name__)
-
 
 @SiteManager.register
 class DoubanMovie(AbstractSite):
@@ -29,11 +30,15 @@ class DoubanMovie(AbstractSite):
     def id_to_url(cls, id_value):
         return "https://movie.douban.com/subject/" + id_value + "/"
 
+    @classmethod
+    def search(cls, q: str, p: int = 1):
+        return DoubanSearcher.search(ItemCategory.Movie, "movie", q, p)
+
     def scrape(self):
         content = DoubanDownloader(self.url).download().html()
         try:
             schema_data = "".join(
-                content.xpath('//script[@type="application/ld+json"]/text()')
+                self.query_list(content, '//script[@type="application/ld+json"]/text()')
             ).replace(
                 "\n", ""
             )  # strip \n bc multi-line string is not properly coded in json by douban
@@ -42,13 +47,13 @@ def scrape(self):
             d = {}
 
         try:
-            raw_title = content.xpath("//span[@property='v:itemreviewed']/text()")[
-                0
-            ].strip()
+            raw_title = self.query_list(
+                content, "//span[@property='v:itemreviewed']/text()"
+            )[0].strip()
         except IndexError:
             raise ParseError(self, "title")
 
-        orig_title = content.xpath("//img[@rel='v:image']/@alt")[0].strip()
+        orig_title = self.query_list(content, "//img[@rel='v:image']/@alt")[0].strip()
         title = raw_title.split(orig_title)[0].strip()
         # if has no chinese title
         if title == "":
@@ -58,40 +63,46 @@ def scrape(self):
             orig_title = None
 
         # there are two html formats for authors and translators
-        other_title_elem = content.xpath(
-            "//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]"
+        other_title_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]",
         )
         other_title = (
             other_title_elem[0].strip().split(" / ") if other_title_elem else None
         )
 
-        imdb_elem = content.xpath(
-            "//div[@id='info']//span[text()='IMDb链接:']/following-sibling::a[1]/text()"
+        imdb_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='IMDb链接:']/following-sibling::a[1]/text()",
         )
         if not imdb_elem:
-            imdb_elem = content.xpath(
-                "//div[@id='info']//span[text()='IMDb:']/following-sibling::text()[1]"
+            imdb_elem = self.query_list(
+                content,
+                "//div[@id='info']//span[text()='IMDb:']/following-sibling::text()[1]",
             )
         imdb_code = imdb_elem[0].strip() if imdb_elem else None
 
-        director_elem = content.xpath(
-            "//div[@id='info']//span[text()='导演']/following-sibling::span[1]/a/text()"
+        director_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='导演']/following-sibling::span[1]/a/text()",
         )
         director = director_elem if director_elem else None
 
-        playwright_elem = content.xpath(
-            "//div[@id='info']//span[text()='编剧']/following-sibling::span[1]/a/text()"
+        playwright_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='编剧']/following-sibling::span[1]/a/text()",
         )
         playwright = (
             list(map(lambda a: a[:200], playwright_elem)) if playwright_elem else None
         )
 
-        actor_elem = content.xpath(
-            "//div[@id='info']//span[text()='主演']/following-sibling::span[1]/a/text()"
+        actor_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='主演']/following-sibling::span[1]/a/text()",
         )
         actor = list(map(lambda a: a[:200], actor_elem)) if actor_elem else None
 
-        genre_elem = content.xpath("//span[@property='v:genre']/text()")
+        genre_elem = self.query_list(content, "//span[@property='v:genre']/text()")
         genre = []
         if genre_elem:
             for g in genre_elem:
@@ -102,7 +113,9 @@ def scrape(self):
                     g = "惊悚"
                 genre.append(g)
 
-        showtime_elem = content.xpath("//span[@property='v:initialReleaseDate']/text()")
+        showtime_elem = self.query_list(
+            content, "//span[@property='v:initialReleaseDate']/text()"
+        )
         if showtime_elem:
             showtime = []
             for st in showtime_elem:
@@ -122,39 +135,39 @@ def scrape(self):
         else:
             showtime = None
 
-        site_elem = content.xpath(
-            "//div[@id='info']//span[text()='官方网站:']/following-sibling::a[1]/@href"
+        site_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='官方网站:']/following-sibling::a[1]/@href",
         )
         site = site_elem[0].strip()[:200] if site_elem else None
         if site and not re.match(r"http.+", site):
             site = None
 
-        area_elem = content.xpath(
-            "//div[@id='info']//span[text()='制片国家/地区:']/following-sibling::text()[1]"
+        area_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='制片国家/地区:']/following-sibling::text()[1]",
         )
         if area_elem:
             area = [a.strip()[:100] for a in area_elem[0].split("/")]
         else:
             area = None
 
-        language_elem = content.xpath(
-            "//div[@id='info']//span[text()='语言:']/following-sibling::text()[1]"
+        language_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='语言:']/following-sibling::text()[1]",
         )
         if language_elem:
             language = [a.strip() for a in language_elem[0].split(" / ")]
         else:
             language = None
 
-        year_elem = content.xpath("//span[@class='year']/text()")
-        year = (
-            int(re.search(r"\d+", year_elem[0])[0])
-            if year_elem and re.search(r"\d+", year_elem[0])
-            else None
-        )
+        year_s = self.query_str(content, "//span[@class='year']/text()")
+        year_r = re.search(r"\d+", year_s) if year_s else None
+        year = int_(year_r[0]) if year_r else None
 
-        duration_elem = content.xpath("//span[@property='v:runtime']/text()")
-        other_duration_elem = content.xpath(
-            "//span[@property='v:runtime']/following-sibling::text()[1]"
+        duration_elem = self.query_list(content, "//span[@property='v:runtime']/text()")
+        other_duration_elem = self.query_list(
+            content, "//span[@property='v:runtime']/following-sibling::text()[1]"
         )
         if duration_elem:
             duration = duration_elem[0].strip()
@@ -164,19 +177,21 @@ def scrape(self):
         else:
             duration = None
 
-        season_elem = content.xpath(
-            "//*[@id='season']/option[@selected='selected']/text()"
+        season_elem = self.query_list(
+            content, "//*[@id='season']/option[@selected='selected']/text()"
         )
         if not season_elem:
-            season_elem = content.xpath(
-                "//div[@id='info']//span[text()='季数:']/following-sibling::text()[1]"
+            season_elem = self.query_list(
+                content,
+                "//div[@id='info']//span[text()='季数:']/following-sibling::text()[1]",
             )
             season = int(season_elem[0].strip()) if season_elem else None
         else:
             season = int(season_elem[0].strip())
 
-        episodes_elem = content.xpath(
-            "//div[@id='info']//span[text()='集数:']/following-sibling::text()[1]"
+        episodes_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='集数:']/following-sibling::text()[1]",
         )
         episodes = (
             int(episodes_elem[0].strip())
@@ -184,8 +199,9 @@ def scrape(self):
             else None
         )
 
-        single_episode_length_elem = content.xpath(
-            "//div[@id='info']//span[text()='单集片长:']/following-sibling::text()[1]"
+        single_episode_length_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='单集片长:']/following-sibling::text()[1]",
         )
         single_episode_length = (
             single_episode_length_elem[0].strip()[:100]
@@ -195,16 +211,16 @@ def scrape(self):
 
         is_series = d.get("@type") == "TVSeries" or episodes is not None
 
-        brief_elem = content.xpath("//span[@class='all hidden']")
+        brief_elem = self.query_list(content, "//span[@class='all hidden']")
         if not brief_elem:
-            brief_elem = content.xpath("//span[@property='v:summary']")
+            brief_elem = self.query_list(content, "//span[@property='v:summary']")
         brief = (
             "\n".join([e.strip() for e in brief_elem[0].xpath("./text()")])
             if brief_elem
             else None
         )
 
-        img_url_elem = content.xpath("//img[@rel='v:image']/@src")
+        img_url_elem = self.query_list(content, "//img[@rel='v:image']/@src")
         img_url = img_url_elem[0].strip() if img_url_elem else None
 
         titles = set(
@@ -261,26 +277,26 @@ def scrape(self):
                     pd.metadata.get("season_number")
                     and pd.metadata.get("season_number") != 1
                 ):
-                    _logger.warn(f"{imdb_code} matched imdb tv show, force season 1")
+                    logger.warning(f"{imdb_code} matched imdb tv show, force season 1")
                     pd.metadata["season_number"] = 1
             elif pd.metadata["preferred_model"] == "TVSeason" and has_episode:
                 if res_data["tv_episode_results"][0]["episode_number"] != 1:
-                    _logger.warning(
+                    logger.warning(
                         f"Douban Movie {self.url} IMDB {imdb_code} mapping to non-first episode in a season"
                     )
                 elif res_data["tv_episode_results"][0]["season_number"] == 1:
-                    _logger.warning(
+                    logger.warning(
                         f"Douban Movie {self.url} IMDB {imdb_code} mapping to first season episode in a season"
                     )
             elif has_movie:
                 if pd.metadata["preferred_model"] != "Movie":
-                    _logger.warn(f"{imdb_code} matched imdb movie, force Movie")
+                    logger.warning(f"{imdb_code} matched imdb movie, force Movie")
                     pd.metadata["preferred_model"] = "Movie"
             elif has_tv or has_episode:
-                _logger.warn(f"{imdb_code} matched imdb tv/episode, force TVSeason")
+                logger.warning(f"{imdb_code} matched imdb tv/episode, force TVSeason")
                 pd.metadata["preferred_model"] = "TVSeason"
             else:
-                _logger.warn(f"{imdb_code} unknown to TMDB")
+                logger.warning(f"{imdb_code} unknown to TMDB")
 
             pd.lookup_ids[IdType.IMDB] = imdb_code
 
diff --git a/catalog/sites/douban_music.py b/catalog/sites/douban_music.py
index d0871fe6..af57ddf7 100644
--- a/catalog/sites/douban_music.py
+++ b/catalog/sites/douban_music.py
@@ -7,9 +7,7 @@
 from catalog.music.utils import upc_to_gtin_13
 from common.models.lang import detect_language
 
-from .douban import DoubanDownloader
-
-_logger = logging.getLogger(__name__)
+from .douban import DoubanDownloader, DoubanSearcher
 
 
 @SiteManager.register
@@ -29,58 +27,63 @@ class DoubanMusic(AbstractSite):
     def id_to_url(cls, id_value):
         return "https://music.douban.com/subject/" + id_value + "/"
 
+    @classmethod
+    def search(cls, q: str, p: int = 1):
+        return DoubanSearcher.search(ItemCategory.Music, "music", q, p)
+
     def scrape(self):
         content = DoubanDownloader(self.url).download().html()
 
-        elem = content.xpath("//h1/span/text()")
+        elem = self.query_list(content, "//h1/span/text()")
         title = elem[0].strip() if len(elem) else None
         if not title:
             raise ParseError(self, "title")
 
-        artists_elem = content.xpath(
-            "//div[@id='info']/span/span[@class='pl']/a/text()"
+        artists_elem = self.query_list(
+            content, "//div[@id='info']/span/span[@class='pl']/a/text()"
         )
         artist = (
             None if not artists_elem else list(map(lambda a: a[:200], artists_elem))
         )
 
-        genre_elem = content.xpath(
-            "//div[@id='info']//span[text()='流派:']/following::text()[1]"
+        genre_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='流派:']/following::text()[1]"
         )
         genre = genre_elem[0].strip().split(" / ") if genre_elem else []
 
-        date_elem = content.xpath(
-            "//div[@id='info']//span[text()='发行时间:']/following::text()[1]"
+        date_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='发行时间:']/following::text()[1]"
         )
         release_date = dateparser.parse(date_elem[0].strip()) if date_elem else None
         release_date = release_date.strftime("%Y-%m-%d") if release_date else None
 
-        company_elem = content.xpath(
-            "//div[@id='info']//span[text()='出版者:']/following::text()[1]"
+        company_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='出版者:']/following::text()[1]"
         )
         company = company_elem[0].strip() if company_elem else None
 
-        track_list_elem = content.xpath(
-            "//div[@class='track-list']/div[@class='indent']/div/text()"
+        track_list_elem = self.query_list(
+            content, "//div[@class='track-list']/div[@class='indent']/div/text()"
         )
         if track_list_elem:
             track_list = "\n".join([track.strip() for track in track_list_elem])
         else:
             track_list = None
 
-        brief_elem = content.xpath("//span[@class='all hidden']")
+        brief_elem = self.query_list(content, "//span[@class='all hidden']")
         if not brief_elem:
-            brief_elem = content.xpath("//span[@property='v:summary']")
+            brief_elem = self.query_list(content, "//span[@property='v:summary']")
         brief = (
             "\n".join([e.strip() for e in brief_elem[0].xpath("./text()")])
             if brief_elem
             else None
         )
 
-        img_url_elem = content.xpath("//div[@id='mainpic']//img/@src")
+        img_url_elem = self.query_list(content, "//div[@id='mainpic']//img/@src")
         img_url = img_url_elem[0].strip() if img_url_elem else None
-        other_elem = content.xpath(
-            "//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]"
+        other_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]",
         )
         other_title = other_elem[0].strip().split(" / ") if other_elem else []
         lang = detect_language(f"{title} {brief}")
@@ -103,28 +106,33 @@ def scrape(self):
         }
         gtin = None
         isrc = None
-        other_elem = content.xpath(
-            "//div[@id='info']//span[text()='专辑类型:']/following-sibling::text()[1]"
+        other_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='专辑类型:']/following-sibling::text()[1]",
         )
         if other_elem:
             data["album_type"] = other_elem[0].strip()
-        other_elem = content.xpath(
-            "//div[@id='info']//span[text()='介质:']/following-sibling::text()[1]"
+        other_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='介质:']/following-sibling::text()[1]",
         )
         if other_elem:
             data["media"] = other_elem[0].strip()
-        other_elem = content.xpath(
-            "//div[@id='info']//span[text()='ISRC:']/following-sibling::text()[1]"
+        other_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='ISRC:']/following-sibling::text()[1]",
         )
         if other_elem:
             isrc = other_elem[0].strip()
-        other_elem = content.xpath(
-            "//div[@id='info']//span[text()='条形码:']/following-sibling::text()[1]"
+        other_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='条形码:']/following-sibling::text()[1]",
         )
         if other_elem:
             gtin = upc_to_gtin_13(other_elem[0].strip())
-        other_elem = content.xpath(
-            "//div[@id='info']//span[text()='碟片数:']/following-sibling::text()[1]"
+        other_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='碟片数:']/following-sibling::text()[1]",
         )
         if other_elem:
             data["disc_count"] = other_elem[0].strip()
diff --git a/pyproject.toml b/pyproject.toml
index 80fc2288..5115b80d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,7 +80,6 @@ exclude = [
     "journal/tests.py",
     "neodb",
     "**/migrations",
-    "**/sites/douban_*",
     "neodb-takahe",
 ]
 reportIncompatibleVariableOverride = false