Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support trigger migrations #3134

Merged
merged 15 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions bookwyrm/migrations/0190_book_search_updates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Generated by Django 3.2.20 on 2023-11-24 17:11

from django.db import migrations


class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0188_theme_loads"),
dato marked this conversation as resolved.
Show resolved Hide resolved
]

operations = [
migrations.RemoveIndex(
model_name="author",
name="bookwyrm_au_search__b050a8_gin",
),
Minnozz marked this conversation as resolved.
Show resolved Hide resolved
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Generated by Django 3.2.20 on 2023-11-25 00:47

from importlib import import_module
import re

from django.db import migrations
import pgtrigger.compiler
import pgtrigger.migrations

trigger_migration = import_module("bookwyrm.migrations.0077_auto_20210623_2155")

# it's _very_ convenient for development that this migration be reversible
search_vector_trigger = trigger_migration.Migration.operations[4]
author_search_vector_trigger = trigger_migration.Migration.operations[5]


assert re.search(r"\bCREATE TRIGGER search_vector_trigger\b", search_vector_trigger.sql)
assert re.search(
r"\bCREATE TRIGGER author_search_vector_trigger\b",
author_search_vector_trigger.sql,
)


class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0190_book_search_updates"),
]

operations = [
pgtrigger.migrations.AddTrigger(
model_name="book",
trigger=pgtrigger.compiler.Trigger(
name="update_search_vector_on_book_edit",
sql=pgtrigger.compiler.UpsertTriggerSql(
func="new.search_vector := setweight(coalesce(nullif(to_tsvector('english', new.title), ''), to_tsvector('simple', new.title)), 'A') || setweight(to_tsvector('english', coalesce(new.subtitle, '')), 'B') || (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C') FROM bookwyrm_author LEFT JOIN bookwyrm_book_authors ON bookwyrm_author.id = bookwyrm_book_authors.author_id WHERE bookwyrm_book_authors.book_id = new.id ) || setweight(to_tsvector('english', coalesce(new.series, '')), 'D');RETURN NEW;",
hash="77d6399497c0a89b0bf09d296e33c396da63705c",
operation='INSERT OR UPDATE OF "title", "subtitle", "series", "search_vector"',
pgid="pgtrigger_update_search_vector_on_book_edit_bec58",
table="bookwyrm_book",
when="BEFORE",
),
),
),
pgtrigger.migrations.AddTrigger(
model_name="author",
trigger=pgtrigger.compiler.Trigger(
name="reset_search_vector_on_author_edit",
sql=pgtrigger.compiler.UpsertTriggerSql(
func="WITH updated_books AS (SELECT book_id FROM bookwyrm_book_authors WHERE author_id = new.id ) UPDATE bookwyrm_book SET search_vector = '' FROM updated_books WHERE id = updated_books.book_id;RETURN NEW;",
hash="e7bbf08711ff3724c58f4d92fb7a082ffb3d7826",
operation='UPDATE OF "name"',
pgid="pgtrigger_reset_search_vector_on_author_edit_a447c",
table="bookwyrm_author",
when="AFTER",
),
),
),
migrations.RunSQL(
sql="""DROP TRIGGER IF EXISTS search_vector_trigger ON bookwyrm_book;
DROP FUNCTION IF EXISTS book_trigger;
""",
reverse_sql=search_vector_trigger.sql,
),
migrations.RunSQL(
sql="""DROP TRIGGER IF EXISTS author_search_vector_trigger ON bookwyrm_author;
DROP FUNCTION IF EXISTS author_trigger;
""",
reverse_sql=author_search_vector_trigger.sql,
),
]
30 changes: 25 additions & 5 deletions bookwyrm/models/author.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
import re
from typing import Tuple, Any

from django.contrib.postgres.indexes import GinIndex
from django.db import models
import pgtrigger

from bookwyrm import activitypub
from bookwyrm.settings import DOMAIN
from bookwyrm.utils.db import format_trigger

from .book import BookDataModel
from . import fields
Expand Down Expand Up @@ -67,9 +68,28 @@ def get_remote_id(self):
"""editions and works both use "book" instead of model_name"""
return f"https://{DOMAIN}/author/{self.id}"

activity_serializer = activitypub.Author

class Meta:
"""sets up postgres GIN index field"""
"""sets up indexes and triggers"""

triggers = [
pgtrigger.Trigger(
name="reset_search_vector_on_author_edit",
when=pgtrigger.After,
operation=pgtrigger.UpdateOf("name"),
func=format_trigger(
"""WITH updated_books AS (
SELECT book_id
FROM bookwyrm_book_authors
WHERE author_id = new.id
Minnozz marked this conversation as resolved.
Show resolved Hide resolved
)
UPDATE bookwyrm_book
SET search_vector = ''
FROM updated_books
WHERE id = updated_books.book_id;
RETURN new;
"""
),
)
]

indexes = (GinIndex(fields=["search_vector"]),)
activity_serializer = activitypub.Author
34 changes: 33 additions & 1 deletion bookwyrm/models/book.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from model_utils import FieldTracker
from model_utils.managers import InheritanceManager
from imagekit.models import ImageSpecField
import pgtrigger

from bookwyrm import activitypub
from bookwyrm.isbn.isbn import hyphenator_singleton as hyphenator
Expand All @@ -24,6 +25,7 @@
ENABLE_PREVIEW_IMAGES,
ENABLE_THUMBNAIL_GENERATION,
)
from bookwyrm.utils.db import format_trigger

from .activitypub_mixin import OrderedCollectionPageMixin, ObjectMixin
from .base_model import BookWyrmModel
Expand Down Expand Up @@ -232,9 +234,39 @@ def __repr__(self):
)

class Meta:
"""sets up postgres GIN index field"""
"""set up indexes and triggers"""

# pylint: disable=line-too-long

indexes = (GinIndex(fields=["search_vector"]),)
triggers = [
pgtrigger.Trigger(
name="update_search_vector_on_book_edit",
when=pgtrigger.Before,
operation=pgtrigger.Insert
| pgtrigger.UpdateOf("title", "subtitle", "series", "search_vector"),
func=format_trigger(
"""new.search_vector :=
-- title, with priority A (parse in English, default to simple if empty)
setweight(COALESCE(nullif(
to_tsvector('english', new.title), ''),
to_tsvector('simple', new.title)), 'A') ||
-- subtitle, with priority B (always in English?)
setweight(to_tsvector('english', COALESCE(new.subtitle, '')), 'B') ||
-- list of authors, with priority C (TODO: add aliases?, bookwyrm-social#3063)
(SELECT setweight(to_tsvector('simple', COALESCE(array_to_string(ARRAY_AGG(bookwyrm_author.name), ' '), '')), 'C')
FROM bookwyrm_author
LEFT JOIN bookwyrm_book_authors
ON bookwyrm_author.id = bookwyrm_book_authors.author_id
WHERE bookwyrm_book_authors.book_id = new.id
) ||
--- last: series name, with lowest priority
setweight(to_tsvector('english', COALESCE(new.series, '')), 'D');
RETURN new;
"""
),
)
]


class Work(OrderedCollectionPageMixin, Book):
Expand Down
1 change: 1 addition & 0 deletions bookwyrm/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
"celery",
"django_celery_beat",
"imagekit",
"pgtrigger",
"storages",
]

Expand Down
186 changes: 186 additions & 0 deletions bookwyrm/tests/test_book_search.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" test searching for books """
import datetime
from django.db import connection
from django.test import TestCase
from django.utils import timezone

Expand Down Expand Up @@ -134,3 +135,188 @@ def parse_isbn_search_data(self, data):
# there's really not much to test here, it's just a dataclass
self.assertEqual(result.confidence, 1)
self.assertEqual(result.title, "Title")


class SearchVectorTest(TestCase):
"""check search_vector is computed correctly"""
dato marked this conversation as resolved.
Show resolved Hide resolved

def test_search_vector_simple(self):
"""simplest search vector"""
book = self._create_book("Book", "Mary")
self.assertEqual(book.search_vector, "'book':1A 'mary':2C") # A > C (priority)

def test_search_vector_all_parts(self):
"""search vector with subtitle and series"""
# for a book like this we call `to_tsvector("Book Long Mary Bunch")`, hence the
# indexes in the search vector. (priority "D" is the default, and never shown.)
book = self._create_book("Book", "Mary", subtitle="Long", series="Bunch")
self.assertEqual(book.search_vector, "'book':1A 'bunch':4 'long':2B 'mary':3C")

def test_search_vector_parse_book(self):
"""book parts are parsed in english"""
# FIXME: at some point this should stop being the default.
book = self._create_book(
"Edition", "Editor", series="Castle", subtitle="Writing"
)
self.assertEqual(
book.search_vector, "'castl':4 'edit':1A 'editor':3C 'write':2B"
)

def test_search_vector_parse_author(self):
"""author name is not stem'd or affected by stop words"""
book = self._create_book("Writing", "Writes")
self.assertEqual(book.search_vector, "'write':1A 'writes':2C")

book = self._create_book("She Is Writing", "She Writes")
self.assertEqual(book.search_vector, "'she':4C 'write':3A 'writes':5C")

def test_search_vector_parse_title_empty(self):
"""empty parse in English retried as simple title"""
book = self._create_book("Here We", "John")
self.assertEqual(book.search_vector, "'here':1A 'john':3C 'we':2A")

book = self._create_book("Hear We Come", "John")
self.assertEqual(book.search_vector, "'come':3A 'hear':1A 'john':4C")

def test_search_vector_no_author(self):
"""book with no authors gets processed normally"""
book = self._create_book("Book", None, series="Bunch")
self.assertEqual(book.search_vector, "'book':1A 'bunch':2")

@staticmethod
def _create_book(
title, author_name, /, *, subtitle="", series="", author_alias=None
):
"""quickly create a book"""
work = models.Work.objects.create(title="work")
edition = models.Edition.objects.create(
title=title,
series=series or None,
subtitle=subtitle or None,
isbn_10="0000000000",
parent_work=work,
)
if author_name is not None:
author = models.Author.objects.create(
name=author_name, aliases=author_alias or []
)
edition.authors.add(author)
edition.save(broadcast=False)
edition.refresh_from_db()
return edition


class SearchVectorTriggers(TestCase):
"""look for books as they change"""

def setUp(self):
"""we need basic test data and mocks"""
self.work = models.Work.objects.create(title="This Work")
self.author = models.Author.objects.create(name="Name")
self.edition = models.Edition.objects.create(
title="First Edition of Work",
subtitle="Some Extra Words Are Good",
series="A Fabulous Sequence of Items",
parent_work=self.work,
isbn_10="0000000000",
)
self.edition.authors.add(self.author)
self.edition.save(broadcast=False)

@classmethod
def setUpTestData(cls):
"""create conditions that trigger known old bugs"""
with connection.cursor() as cursor:
cursor.execute(
"""
ALTER SEQUENCE bookwyrm_author_id_seq RESTART WITH 20;
ALTER SEQUENCE bookwyrm_book_authors_id_seq RESTART WITH 300;
"""
)

def test_search_after_changed_metadata(self):
"""book found after updating metadata"""
self.assertEqual(self.edition, self._search_first("First")) # title
self.assertEqual(self.edition, self._search_first("Good")) # subtitle
self.assertEqual(self.edition, self._search_first("Sequence")) # series

self.edition.title = "Second Title of Work"
self.edition.subtitle = "Fewer Words Is Better"
self.edition.series = "A Wondrous Bunch"
self.edition.save(broadcast=False)

self.assertEqual(self.edition, self._search_first("Second")) # title new
self.assertEqual(self.edition, self._search_first("Fewer")) # subtitle new
self.assertEqual(self.edition, self._search_first("Wondrous")) # series new

self.assertFalse(self._search_first("First")) # title old
self.assertFalse(self._search_first("Good")) # subtitle old
self.assertFalse(self._search_first("Sequence")) # series old

def test_search_after_author_remove(self):
"""book not found via removed author"""
self.assertEqual(self.edition, self._search_first("Name"))

self.edition.authors.set([])
self.edition.save(broadcast=False)

self.assertFalse(self._search("Name"))
self.assertEqual(self.edition, self._search_first("Edition"))

def test_search_after_author_add(self):
"""book found by newly-added author"""
new_author = models.Author.objects.create(name="Mozilla")

self.assertFalse(self._search("Mozilla"))

self.edition.authors.add(new_author)
self.edition.save(broadcast=False)

self.assertEqual(self.edition, self._search_first("Mozilla"))
self.assertEqual(self.edition, self._search_first("Name"))

def test_search_after_author_add_remove_sql(self):
"""add/remove author through SQL to ensure execution of book_authors trigger"""
# Tests calling edition.save(), above, pass even if the trigger in
# bookwyrm_book_authors is removed (probably because they trigger the one
# in bookwyrm_book directly). Here we make sure to exercise the former.
new_author = models.Author.objects.create(name="Mozilla")

with connection.cursor() as cursor:
cursor.execute(
"DELETE FROM bookwyrm_book_authors WHERE book_id = %s",
[self.edition.id],
)
self.assertFalse(self._search("Name"))
self.assertFalse(self._search("Mozilla"))

with connection.cursor() as cursor:
cursor.execute(
"INSERT INTO bookwyrm_book_authors (book_id,author_id) VALUES (%s,%s)",
[self.edition.id, new_author.id],
)
self.assertFalse(self._search("Name"))
self.assertEqual(self.edition, self._search_first("Mozilla"))

def test_search_after_updated_author_name(self):
"""book found under new author name"""
self.assertEqual(self.edition, self._search_first("Name"))
self.assertFalse(self._search("Identifier"))

self.author.name = "Identifier"
self.author.save(broadcast=False)

self.assertFalse(self._search("Name"))
self.assertEqual(self.edition, self._search_first("Identifier"))
self.assertEqual(self.edition, self._search_first("Work"))

def _search_first(self, query):
"""wrapper around search_title_author"""
return self._search(query, return_first=True)

@staticmethod
def _search(query, *, return_first=False):
"""wrapper around search_title_author"""
return book_search.search_title_author(
query, min_confidence=0, return_first=return_first
)
Loading
Loading