diff --git a/boofilsic/settings.py b/boofilsic/settings.py index fc6586bf..8fe469cb 100644 --- a/boofilsic/settings.py +++ b/boofilsic/settings.py @@ -118,6 +118,7 @@ NEODB_SENTRY_DSN=(str, ""), NEODB_SENTRY_SAMPLE_RATE=(float, 0), NEODB_FANOUT_LIMIT_DAYS=(int, 9), + INDEX_ALIASES=(dict, {}), ) # ====== End of user configuration variables ====== @@ -561,6 +562,8 @@ def _init_language_settings(preferred_lanugages_env): SEARCH_INDEX_NEW_ONLY = False +INDEX_ALIASES = env("INDEX_ALIASES") + DOWNLOADER_SAVEDIR = env("NEODB_DOWNLOADER_SAVE_DIR", default="/tmp") # type: ignore DISABLE_MODEL_SIGNAL = False # disable index and social feeds during importing/etc diff --git a/catalog/book/models.py b/catalog/book/models.py index 866e0a32..c1caae34 100644 --- a/catalog/book/models.py +++ b/catalog/book/models.py @@ -216,6 +216,12 @@ def get_localized_subtitle(self) -> str | None: def display_subtitle(self) -> str | None: return self.get_localized_subtitle() + def to_indexable_titles(self) -> list[str]: + titles = [t["text"] for t in self.localized_title if t] + titles += [t["text"] for t in self.localized_subtitle if t] + titles += [self.orig_title] if self.orig_title else [] + return list(set(titles)) + @property def isbn10(self): return isbn_13_to_10(self.isbn) diff --git a/catalog/collection/models.py b/catalog/collection/models.py index d33924e5..b5da4902 100644 --- a/catalog/collection/models.py +++ b/catalog/collection/models.py @@ -10,6 +10,10 @@ class Collection(Item): journal_item: "JournalCollection" category = ItemCategory.Collection + @property + def url(self): + return self.journal_item.url if self.journal_item else super().url + @property def owner_id(self): return self.journal_item.owner_id if self.journal_item else None diff --git a/catalog/common/models.py b/catalog/common/models.py index 47085c6f..db8fa015 100644 --- a/catalog/common/models.py +++ b/catalog/common/models.py @@ -470,7 +470,7 @@ def clear(self): res.save() def __str__(self): - return f"{self.__class__.__name__}|{self.pk}|{self.uuid} {self.primary_lookup_id_type}:{self.primary_lookup_id_value if self.primary_lookup_id_value else ''} ({self.title})" + return f"{self.__class__.__name__}|{self.pk}|{self.uuid} {self.primary_lookup_id_type}:{self.primary_lookup_id_value if self.primary_lookup_id_value else ''} ({self.display_title})" @classmethod def lookup_id_type_choices(cls): @@ -567,6 +567,12 @@ def merge_to(self, to_item: "Item | None"): res.item = to_item res.save() + @property + def final_item(self) -> Self: + if self.merged_to_item: + return self.merged_to_item.final_item + return self + def recast_to(self, model: "type[Any]") -> "Item": logger.warning(f"recast item {self} to {model}") if isinstance(self, model): @@ -657,6 +663,12 @@ def display_description(self) -> str: def brief_description(self): return (str(self.display_description) or "")[:155] + def to_indexable_titles(self) -> list[str]: + titles = [t["text"] for t in self.localized_title if t] + if self.parent_item: + titles += self.parent_item.to_indexable_titles() + return list(set(titles)) + @classmethod def get_by_url(cls, url_or_b62: str, resolve_merge=False) -> "Self | None": b62 = url_or_b62.strip().split("/")[-1] diff --git a/catalog/movie/models.py b/catalog/movie/models.py index 55624873..5955bf16 100644 --- a/catalog/movie/models.py +++ b/catalog/movie/models.py @@ -173,3 +173,8 @@ def lookup_id_cleanup(cls, lookup_id_type, lookup_id_value): else: return None, None return super().lookup_id_cleanup(lookup_id_type, lookup_id_value) + + def to_indexable_titles(self) -> list[str]: + titles = [t["text"] for t in self.localized_title if t] + titles += [self.orig_title] if self.orig_title else [] + return list(set(titles)) diff --git a/catalog/search/views.py b/catalog/search/views.py index 23d16f65..7d471559 100644 --- a/catalog/search/views.py +++ b/catalog/search/views.py @@ -1,4 +1,5 @@ import re +from urllib.parse import quote import django_rq from django.conf import settings @@ -6,12 +7,14 @@ from django.core.cache import cache from django.core.exceptions import BadRequest from django.shortcuts import redirect, render +from django.urls import reverse from django.utils.translation import gettext as _ from django.views.decorators.http import require_http_methods from rq.job import Job from catalog.common.models import ItemCategory, SiteName from catalog.common.sites import AbstractSite, SiteManager +from common.models import int_ from common.utils import ( HTTPResponseHXRedirect, PageLinksGenerator, @@ -37,7 +40,7 @@ def fetch_refresh(request, job_id): else: return HTTPResponseHXRedirect(item_url) else: - retry = int(request.GET.get("retry", 0)) + 1 + retry = int_(request.GET.get("retry", 0)) + 1 if retry > 10: return render(request, "_fetch_failed.html") else: @@ -97,10 +100,10 @@ def visible_categories(request): @user_identity_required def search(request): + category = request.GET.get("c", default="all").strip().lower() keywords = request.GET.get("q", default="").strip() if re.match(r"^[@@]", keywords): return query_identity(request, keywords.replace("@", "@")) - category = request.GET.get("c", default="all").strip().lower() hide_category = False if category == "all" or not category: category = None @@ -115,8 +118,7 @@ def search(request): categories = visible_categories(request) tag = request.GET.get("tag", default="").strip() tag = Tag.deep_cleanup_title(tag, default="") - p = request.GET.get("page", default="1") - p = int(p) if p.isdigit() else 1 + p = int_(request.GET.get("page", default="1"), 1) if not (keywords or tag): return render( request, @@ -158,7 +160,7 @@ def external_search(request): if category == "all": category = None keywords = request.GET.get("q", default="").strip() - page_number = int(request.GET.get("page", default=1)) + page_number = int_(request.GET.get("page"), 1) items = ExternalSources.search(category, keywords, page_number) if keywords else [] cache_key = f"search_{category if category!='movietv' else 'movie,tv'}_{keywords}" dedupe_urls = cache.get(cache_key, []) diff --git a/catalog/templates/_item_card_metadata_base.html b/catalog/templates/_item_card_metadata_base.html index adb3be2a..dfad9b1b 100644 --- a/catalog/templates/_item_card_metadata_base.html +++ b/catalog/templates/_item_card_metadata_base.html @@ -52,7 +52,7 @@
{% for tag in item.tags %} {% if forloop.counter <= 5 %} - {{ tag }} + {{ tag }} {% endif %} {% endfor %} diff --git a/catalog/templates/_item_card_metadata_collection.html b/catalog/templates/_item_card_metadata_collection.html new file mode 100644 index 00000000..9fcc2644 --- /dev/null +++ b/catalog/templates/_item_card_metadata_collection.html @@ -0,0 +1,16 @@ +{% extends "_item_card_metadata_base.html" %} +{% load humanize %} +{% load i18n %} +{% block brief %} +
+ {% if item.rating %} + {{ item.rating | floatformat:1 }} ({{ item.rating_count }} {% trans "ratings" %}) + {% endif %} + {% include '_people.html' with people=item.host role='host' max=5 %} +
+{% endblock brief %} +{% block full %} +
+ {% if not hide_brief %}{{ item.display_description | linebreaksbr }}{% endif %} +
+{% endblock full %} diff --git a/catalog/templates/discover.html b/catalog/templates/discover.html index 4528e7a3..c6428ae8 100644 --- a/catalog/templates/discover.html +++ b/catalog/templates/discover.html @@ -163,7 +163,7 @@
{% trans "Collections" %}
{% for t in popular_tags %} - {{ t }} + {{ t }} {% empty %}
{% trans "nothing so far." %}
diff --git a/catalog/templates/item_base.html b/catalog/templates/item_base.html index ce7aa20d..78d2ad1a 100644 --- a/catalog/templates/item_base.html +++ b/catalog/templates/item_base.html @@ -191,7 +191,7 @@

{% for tag in item.tags %} - {{ tag }} + {{ tag }} {% endfor %}
diff --git a/catalog/templates/search_header.html b/catalog/templates/search_header.html new file mode 100644 index 00000000..dcfb722d --- /dev/null +++ b/catalog/templates/search_header.html @@ -0,0 +1,80 @@ +{% load static %} +{% load i18n %} +{% load l10n %} +{% load humanize %} +{% load mastodon %} +{% load duration %} +{% load thumb %} +
+
“{{ request.GET.q }}”
+
+ {% visible_categories as cats %} + {% if request.GET.c and request.GET.c != 'all' %} + {% trans "all" %} + {% else %} + {% trans "all" %} + {% endif %} + {% if 'book' in cats %} + | + {% if request.GET.c != 'book' %} + {% trans "books" %} + {% else %} + {% trans "books" %} + {% endif %} + {% endif %} + {% if 'movie' in cats or 'tv' in cats %} + | + {% if request.GET.c != 'movietv' %} + {% trans "movie & tv" %} + {% else %} + {% trans "movie & tv" %} + {% endif %} + {% endif %} + {% if 'podcast' in cats %} + | + {% if request.GET.c != 'podcast' %} + {% trans "podcasts" %} + {% else %} + {% trans "podcasts" %} + {% endif %} + {% endif %} + {% if 'music' in cats %} + | + {% if request.GET.c != 'music' %} + {% trans "music" %} + {% else %} + {% trans "music" %} + {% endif %} + {% endif %} + {% if 'game' in cats %} + | + {% if request.GET.c != 'game' %} + {% trans "games" %} + {% else %} + {% trans "games" %} + {% endif %} + {% endif %} + {% if 'performance' in cats %} + | + {% if request.GET.c != 'performance' %} + {% trans "performances" %} + {% else %} + {% trans "performances" %} + {% endif %} + {% endif %} + {% if user.is_authenticated %} + | + {% if request.GET.c != 'journal' %} + {% trans "your journal" %} + {% else %} + {% trans "your journal" %} + {% endif %} + | + {% if request.GET.c != 'timeline' %} + {% trans "your timeline" %} + {% else %} + {% trans "your timeline" %} + {% endif %} + {% endif %} +
+
diff --git a/catalog/templates/search_results.html b/catalog/templates/search_results.html index 280a7735..64f76d54 100644 --- a/catalog/templates/search_results.html +++ b/catalog/templates/search_results.html @@ -20,65 +20,7 @@
{% if request.GET.q %} -
-
“{{ request.GET.q }}”
-
- {% visible_categories as cats %} - {% if request.GET.c and request.GET.c != 'all' %} - {% trans "all" %} - {% else %} - {% trans "all" %} - {% endif %} - {% if 'book' in cats %} - | - {% if request.GET.c != 'book' %} - {% trans "books" %} - {% else %} - {% trans "books" %} - {% endif %} - {% endif %} - {% if 'movie' in cats or 'tv' in cats %} - | - {% if request.GET.c != 'movietv' %} - {% trans "movie & tv" %} - {% else %} - {% trans "movie & tv" %} - {% endif %} - {% endif %} - {% if 'podcast' in cats %} - | - {% if request.GET.c != 'podcast' %} - {% trans "podcasts" %} - {% else %} - {% trans "podcasts" %} - {% endif %} - {% endif %} - {% if 'music' in cats %} - | - {% if request.GET.c != 'music' %} - {% trans "music" %} - {% else %} - {% trans "music" %} - {% endif %} - {% endif %} - {% if 'game' in cats %} - | - {% if request.GET.c != 'game' %} - {% trans "games" %} - {% else %} - {% trans "games" %} - {% endif %} - {% endif %} - {% if 'performance' in cats %} - | - {% if request.GET.c != 'performance' %} - {% trans "performances" %} - {% else %} - {% trans "performances" %} - {% endif %} - {% endif %} -
-
+ {% include "search_header.html" %} {% endif %} {% if request.GET.tag %}
{% trans 'tag' %}: “{{ request.GET.tag }}”
diff --git a/catalog/tv/models.py b/catalog/tv/models.py index 2b632fc1..a94f7f16 100644 --- a/catalog/tv/models.py +++ b/catalog/tv/models.py @@ -250,6 +250,11 @@ def child_items(self): def get_season_count(self): return self.season_count or self.seasons.all().count() + def to_indexable_titles(self) -> list[str]: + titles = [t["text"] for t in self.localized_title if t] + titles += [self.orig_title] if self.orig_title else [] + return list(set(titles)) + class TVSeason(Item): if TYPE_CHECKING: @@ -434,6 +439,12 @@ def additional_title(self) -> list[str]: and RE_LOCALIZED_SEASON_NUMBERS.sub("", t["text"]) != "" ] + def to_indexable_titles(self) -> list[str]: + titles = [t["text"] for t in self.localized_title if t] + titles += [self.orig_title] if self.orig_title else [] + titles += self.parent_item.to_indexable_titles() if self.parent_item else [] + return list(set(titles)) + def update_linked_items_from_external_resource(self, resource): for w in resource.required_resources: if w["model"] == "TVShow": diff --git a/catalog/urls.py b/catalog/urls.py index fae8ce5c..06726ae0 100644 --- a/catalog/urls.py +++ b/catalog/urls.py @@ -157,7 +157,6 @@ def _get_all_url_paths(): mark_list, name="mark_list", ), - path("search", search, name="search"), path("search/", search, name="search_legacy"), path("search/external", external_search, name="external_search"), path("fetch_refresh/", fetch_refresh, name="fetch_refresh"), diff --git a/common/models/__init__.py b/common/models/__init__.py index 5281699a..87ad4001 100644 --- a/common/models/__init__.py +++ b/common/models/__init__.py @@ -1,4 +1,5 @@ from .cron import BaseJob, JobManager +from .index import Index, SearchResult from .lang import ( LANGUAGE_CHOICES, LOCALE_CHOICES, @@ -9,4 +10,21 @@ detect_language, get_current_locales, ) -from .misc import uniq +from .misc import int_, uniq + +__all__ = [ + "BaseJob", + "JobManager", + "LANGUAGE_CHOICES", + "LOCALE_CHOICES", + "SCRIPT_CHOICES", + "SITE_DEFAULT_LANGUAGE", + "SITE_PREFERRED_LANGUAGES", + "SITE_PREFERRED_LOCALES", + "detect_language", + "get_current_locales", + "uniq", + "int_", + "Index", + "SearchResult", +] diff --git a/common/models/index.py b/common/models/index.py new file mode 100644 index 00000000..7e94a401 --- /dev/null +++ b/common/models/index.py @@ -0,0 +1,223 @@ +from functools import cached_property +from time import sleep +from typing import Iterable, Self, TypeVar + +import typesense +from django.conf import settings +from loguru import logger +from typesense.collection import Collection +from typesense.exceptions import ObjectNotFound + + +class SearchResult: + def __init__(self, index: "Index", response: dict): + self.index = index + self.response = response + self.page_size = response["request_params"]["per_page"] + self.total = response["found"] + self.page = response["page"] + self.pages = (self.total + self.page_size - 1) // self.page_size + + def __repr__(self): + return f"SearchResult(search '{self.response['request_params']['q']}', found {self.response['found']} out of {self.response['out_of']}, page {self.response['page']})" + + def __str__(self): + return f"SearchResult(search '{self.response['request_params']['q']}', found {self.response['found']} out of {self.response['out_of']}, page {self.response['page']})" + + def get_facet(self, field): + f = next( + (f for f in self.response["facet_counts"] if f["field_name"] == field), + None, + ) + if not f: + return {} + return {v["value"]: v["count"] for v in f["counts"]} + + def __bool__(self): + return len(self.response["hits"]) > 0 + + def __len__(self): + return len(self.response["hits"]) + + def __iter__(self): + return iter(self.response["hits"]) + + def __getitem__(self, key): + return self.response["hits"][key] + + def __contains__(self, item): + return item in self.response["hits"] + + +SearchResultClass = TypeVar("SearchResultClass", bound=SearchResult) + + +class Index: + name = "" # must be set in subclass + schema = {"fields": []} # must be set in subclass + max_pages = 100 + default_search_params = { + # "query_by": ..., + "per_page": 20, + "highlight_fields": "", + "include_fields": "id", + } + + _instance = None + _client: typesense.Client + + @classmethod + def instance(cls) -> Self: + if not cls._instance: + cls._instance = cls() + return cls._instance + + @classmethod + def get_client(cls): + return typesense.Client(settings.TYPESENSE_CONNECTION) + + def __init__(self, *args, **kwargs): + self._client = self.get_client() + + def _get_collection(self, for_write=False) -> Collection: + global _cached_collections + collection_id = self.name + ("_write" if for_write else "_read") + cname = settings.INDEX_ALIASES.get(collection_id) or settings.INDEX_ALIASES.get( + self.name, self.name + ) + collection = self._client.collections[cname] + if not collection: + raise KeyError(f"Typesense: collection {collection_id} not found") + return collection + + @cached_property + def read_collection(self) -> Collection: + return self._get_collection() + + @cached_property + def write_collection(self) -> Collection: + return self._get_collection(True) + + @classmethod + def get_schema(cls) -> dict: + cname = settings.INDEX_ALIASES.get( + cls.name + "_write" + ) or settings.INDEX_ALIASES.get(cls.name, cls.name) + schema = {"name": cname} + schema.update(cls.schema) + return schema + + def check(self) -> dict: + if not self._client.operations.is_healthy(): + raise ValueError("Typesense: server not healthy") + return self.read_collection.retrieve() + + def create_collection(self): + self._client.collections.create(self.get_schema()) + + def delete_collection(self): + self.write_collection.delete() + + def update_schema(self): + self.write_collection.update(self.get_schema()) + + def initialize_collection(self, max_wait=5) -> bool: + try: + wait = max_wait + while not self._client.operations.is_healthy() and wait: + logger.warning("Typesense: server not healthy") + sleep(1) + wait -= 1 + if not wait: + logger.error("Typesense: timeout waiting for server") + return False + cname = settings.INDEX_ALIASES.get( + self.name + "_write" + ) or settings.INDEX_ALIASES.get(self.name, self.name) + collection = self._client.collections[cname] + if collection: + try: + i = collection.retrieve() + logger.debug(f"Typesense: {cname} has {i['num_documents']} docs") + except ObjectNotFound: + self.create_collection() + logger.info(f"Typesense: {cname} created") + return True + logger.error("Typesense: server unknown error") + except Exception as e: + logger.error(f"Typesense: server error {e}") + return False + + def replace_docs(self, docs: Iterable[dict]): + if not docs: + return False + rs = self.write_collection.documents.import_(docs, {"action": "upsert"}) + for r in rs: + e = r.get("error", None) + if e: + logger.error(f"Typesense: {self.name} import error {e}") + if settings.DEBUG: + logger.error(f"Typesense: {r}") + + def insert_docs(self, docs: Iterable[dict]): + if not docs: + return False + rs = self.write_collection.documents.import_(docs) + for r in rs: + e = r.get("error", None) + if e: + logger.error(f"Typesense: {self.name} import error {e}") + if settings.DEBUG: + logger.error(f"Typesense: {r}") + + def delete_docs(self, field: str, values: list[int] | str) -> int: + v: str = ( + ("[" + ",".join(map(str, values)) + "]") + if isinstance(values, list) + else values + ) + q = {"filter_by": f"{field}:{v}"} + r = self.write_collection.documents.delete(q) + return (r or {}).get("num_deleted", 0) + + def patch_docs(self, partial_doc: dict, doc_filter: str): + self.write_collection.documents.update(partial_doc, {"filter_by": doc_filter}) + + def search( + self, + q: str, + page: int = 1, + page_size: int = 0, + query_by: list[str] = [], + sort_by: str = "", + filter_by: dict[str, list[str | int]] = {}, + facet_by: list[str] = [], + result_class: type[SearchResultClass] = SearchResult, + ) -> SearchResultClass: + params = self.default_search_params.copy() + params["q"] = q + params["page"] = page if page > 0 and page <= self.max_pages else 1 + if page_size: + params["per_page"] = page_size + filters = [] + for field, values in filter_by.items(): + if field == "_": + filters += values + elif values: + v = f"[{','.join(map(str, values))}]" if len(values) > 1 else values[0] + filters.append(f"{field}:{v}") + if filters: + params["filter_by"] = " && ".join(filters) + if facet_by: + params["facet_by"] = ",".join(facet_by) + if query_by: + params["query_by"] = ",".join(query_by) + if sort_by: + params["sort_by"] = sort_by + if settings.DEBUG: + logger.debug(f"Typesense: search {self.name} {params}") + r = self.read_collection.documents.search(params) + sr = result_class(self, r) + if settings.DEBUG: + logger.debug(f"Typesense: search result {sr}") + return sr diff --git a/common/models/misc.py b/common/models/misc.py index f704a02e..1200770e 100644 --- a/common/models/misc.py +++ b/common/models/misc.py @@ -4,3 +4,11 @@ def uniq(ls: list) -> list: if i not in r: r.append(i) return r + + +def int_(x, default=0): + return ( + int(x) + if isinstance(x, str) and x.isdigit() + else (x if isinstance(x, int) else default) + ) diff --git a/common/templates/_header.html b/common/templates/_header.html index 2421ebad..cef86409 100644 --- a/common/templates/_header.html +++ b/common/templates/_header.html @@ -10,7 +10,7 @@