Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add management commands to merge a pair of editions or authors #2821

Merged
merged 4 commits into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 2 additions & 36 deletions bookwyrm/management/commands/deduplicate_book_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,7 @@
from django.core.management.base import BaseCommand
from django.db.models import Count
from bookwyrm import models


def update_related(canonical, obj):
"""update all the models with fk to the object being removed"""
# move related models to canonical
related_models = [
(r.remote_field.name, r.related_model) for r in canonical._meta.related_objects
]
for (related_field, related_model) in related_models:
related_objs = related_model.objects.filter(**{related_field: obj})
for related_obj in related_objs:
print("replacing in", related_model.__name__, related_field, related_obj.id)
try:
setattr(related_obj, related_field, canonical)
related_obj.save()
except TypeError:
getattr(related_obj, related_field).add(canonical)
getattr(related_obj, related_field).remove(obj)


def copy_data(canonical, obj):
"""try to get the most data possible"""
for data_field in obj._meta.get_fields():
if not hasattr(data_field, "activitypub_field"):
continue
data_value = getattr(obj, data_field.name)
if not data_value:
continue
if not getattr(canonical, data_field.name):
print("setting data field", data_field.name, data_value)
setattr(canonical, data_field.name, data_value)
canonical.save()
from bookwyrm.management.merge import merge_objects


def dedupe_model(model):
Expand All @@ -61,10 +30,7 @@ def dedupe_model(model):
print("keeping", canonical.remote_id)
for obj in objs[1:]:
print(obj.remote_id)
copy_data(canonical, obj)
update_related(canonical, obj)
# remove the outdated entry
obj.delete()
merge_objects(canonical, obj)


class Command(BaseCommand):
Expand Down
12 changes: 12 additions & 0 deletions bookwyrm/management/commands/merge_authors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
""" PROCEED WITH CAUTION: uses deduplication fields to permanently
merge author data objects """
from bookwyrm import models
from bookwyrm.management.merge_command import MergeCommand


class Command(MergeCommand):
"""merges two authors by ID"""

help = "merges specified authors into one"

MODEL = models.Author
12 changes: 12 additions & 0 deletions bookwyrm/management/commands/merge_editions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
""" PROCEED WITH CAUTION: uses deduplication fields to permanently
merge edition data objects """
from bookwyrm import models
from bookwyrm.management.merge_command import MergeCommand


class Command(MergeCommand):
"""merges two editions by ID"""

help = "merges specified editions into one"

MODEL = models.Edition
50 changes: 50 additions & 0 deletions bookwyrm/management/merge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from django.db.models import ManyToManyField


def update_related(canonical, obj):
"""update all the models with fk to the object being removed"""
# move related models to canonical
related_models = [
(r.remote_field.name, r.related_model) for r in canonical._meta.related_objects
]
for (related_field, related_model) in related_models:
# Skip the ManyToMany fields that aren’t auto-created. These
# should have a corresponding OneToMany field in the model for
# the linking table anyway. If we update it through that model
# instead then we won’t lose the extra fields in the linking
# table.
related_field_obj = related_model._meta.get_field(related_field)
if isinstance(related_field_obj, ManyToManyField):
through = related_field_obj.remote_field.through
if not through._meta.auto_created:
continue
related_objs = related_model.objects.filter(**{related_field: obj})
for related_obj in related_objs:
print("replacing in", related_model.__name__, related_field, related_obj.id)
try:
setattr(related_obj, related_field, canonical)
related_obj.save()
except TypeError:
getattr(related_obj, related_field).add(canonical)
getattr(related_obj, related_field).remove(obj)


def copy_data(canonical, obj):
"""try to get the most data possible"""
for data_field in obj._meta.get_fields():
if not hasattr(data_field, "activitypub_field"):
continue
data_value = getattr(obj, data_field.name)
if not data_value:
continue
if not getattr(canonical, data_field.name):
print("setting data field", data_field.name, data_value)
setattr(canonical, data_field.name, data_value)
canonical.save()


def merge_objects(canonical, obj):
copy_data(canonical, obj)
update_related(canonical, obj)
# remove the outdated entry
obj.delete()
29 changes: 29 additions & 0 deletions bookwyrm/management/merge_command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from bookwyrm.management.merge import merge_objects
from django.core.management.base import BaseCommand


class MergeCommand(BaseCommand):
"""base class for merge commands"""

def add_arguments(self, parser):
"""add the arguments for this command"""
parser.add_argument("--canonical", type=int, required=True)
parser.add_argument("--other", type=int, required=True)

# pylint: disable=no-self-use,unused-argument
def handle(self, *args, **options):
"""merge the two objects"""
model = self.MODEL

try:
canonical = model.objects.get(id=options["canonical"])
except model.DoesNotExist:
print("canonical book doesn’t exist!")
return
try:
other = model.objects.get(id=options["other"])
except model.DoesNotExist:
print("other book doesn’t exist!")
return

merge_objects(canonical, other)