Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove external integrations from collection integrations (PP-503) #1494

Merged
merged 24 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
0d9d4ff
Remove external integration.
jonathangreen Oct 25, 2023
400d0b1
Collections don't store their own name, that is delegated to the inte…
jonathangreen Oct 26, 2023
635f313
Collections are related to libraries via their settings.
jonathangreen Oct 27, 2023
33a83d9
Remove collection external_account_id setting.
jonathangreen Oct 27, 2023
72df032
Remove a couple more unused properties
jonathangreen Oct 30, 2023
58497be
Remove OPDS importer specific primary_identifier_source from collection.
jonathangreen Oct 30, 2023
c89b097
Turn protocol into a property rather then a hybrid-property
jonathangreen Oct 30, 2023
bbe033d
Move child settings into child settings object.
jonathangreen Oct 30, 2023
c8d9039
Add index.
jonathangreen Oct 30, 2023
1d324f3
Replace collection settings controller with integration settings cont…
jonathangreen Oct 31, 2023
1ac0512
Update self test controller
jonathangreen Nov 1, 2023
6927204
Remove unused OPDS self test code
jonathangreen Nov 1, 2023
adf7aa7
Add initial migration script
jonathangreen Nov 1, 2023
4a444c2
Add additional test for collection self tests
jonathangreen Nov 1, 2023
cfc1f59
Work through the migration. Don't remove old DB columns.
jonathangreen Nov 1, 2023
613e134
Mypy fix
jonathangreen Nov 1, 2023
593901c
Clean up code coverage a bit.
jonathangreen Nov 2, 2023
b1c284f
Add tests for migration.
jonathangreen Nov 2, 2023
eafb469
Small updates for issues found in manual testing.
jonathangreen Nov 3, 2023
7de714b
Address code review feedback.
jonathangreen Nov 6, 2023
d902766
Make sure AnyHttpUrl is used everywhere.
jonathangreen Nov 6, 2023
cc2f196
Merge branch 'main' into feature/remove-external-integration-collection
jonathangreen Nov 7, 2023
75ba9ef
Fix opds validation tests.
jonathangreen Nov 7, 2023
94efafa
Merge branch 'main' into feature/remove-external-integration-collection
jonathangreen Nov 17, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
"""Remove collection external integration.

Revision ID: 2d72d6876c52
Revises: cc084e35e037
Create Date: 2023-11-01 22:42:06.754873+00:00

"""

import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

from alembic import op
from api.integration.registry.license_providers import LicenseProvidersRegistry
RishiDiwanTT marked this conversation as resolved.
Show resolved Hide resolved
from core.migration.util import migration_logger
from core.model import json_serializer

# revision identifiers, used by Alembic.
revision = "2d72d6876c52"
down_revision = "cc084e35e037"
branch_labels = None
depends_on = None


log = migration_logger(revision)


def upgrade() -> None:
conn = op.get_bind()

# Our collection names have gotten out of sync with the integration names. The collection names
# are what are being displayed to users, so before we stop using the collection name, we need
# to update the integration name to match the collection name.
# For now, we leave the collection name column in place, but we make it nullable and remove the
# unique constraint.
rows = conn.execute(
"SELECT c.id as collection_id, ic.id as integration_id, ic.name as integration_name, "
"c.name as collection_name from collections c JOIN integration_configurations ic "
"ON c.integration_configuration_id = ic.id WHERE c.name != ic.name"
).all()

for row in rows:
log.info(
f"Updating name for collection {row.collection_id} from {row.integration_name} to {row.collection_name}."
)
conn.execute(
"UPDATE integration_configurations SET name = (%s) WHERE id = (%s)",
(row.collection_name, row.integration_id),
)

op.alter_column("collections", "name", existing_type=sa.VARCHAR(), nullable=True)
op.drop_index("ix_collections_name", table_name="collections")

# We have moved the setting for the TOKEN_AUTH integration from an external integration
# to a new JSONB column on the integration_configurations table (context). We need to move
# the data into the new column as part of this migration.
# The context column is not nullable, so we need to set a default value for the existing
# rows. We will use an empty JSON object. We create the column as nullable, set the default
# value, then make it non-nullable.
op.add_column(
"integration_configurations",
sa.Column("context", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not set nullable=False with default='{}' here rather than later?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that you can use default with a new column in a table that has existing data.

When I try that the migration fails for me with:

E       sqlalchemy.exc.IntegrityError: (psycopg2.errors.NotNullViolation) column "context" contains null values
E
E       [SQL: ALTER TABLE integration_configurations ADD COLUMN context JSONB NOT NULL]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok, yes default is an application level default. Using server_default will work, I tested it out.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this changes the DB level schema though, right?

So migrated tables would have a different schema definition then tables that weren't migrated. We could mitigate this by changing the column definition to use server_default, but since none of our other tables with defaults use server_default, I'd rather leave it as default for consistency.

)

conn.execute("UPDATE integration_configurations SET context = '{}'")

rows = conn.execute(
"SELECT c.id, cs.value FROM collections c "
"JOIN externalintegrations ei ON c.external_integration_id = ei.id "
"JOIN configurationsettings cs ON ei.id = cs.external_integration_id "
"WHERE key='token_auth_endpoint' and value <> ''"
).all()

for row in rows:
context = json_serializer({"token_auth_endpoint": row.value})
log.info(f"Updating context for collection {row.id} to {context}.")
conn.execute(
"UPDATE integration_configurations SET context = (%s) "
"FROM collections "
"WHERE integration_configurations.id = collections.integration_configuration_id "
"and collections.id = (%s)",
(context, row.id),
)

op.alter_column("integration_configurations", "context", nullable=False)

# We have moved the data that was in external_account_id into the settings column of the
# integration, so we need to make sure that it gets moved as part of this migration. We
# also make sure that the new settings are valid for the integration before saving them
# to the database.
rows = conn.execute(
"SELECT ic.id as integration_id, ic.settings, ic.protocol, ic.goal, c.external_account_id FROM collections c "
"JOIN integration_configurations ic ON c.integration_configuration_id = ic.id"
).all()

registry = LicenseProvidersRegistry()
for row in rows:
if row.external_account_id is None:
continue
settings_dict = row.settings.copy()
settings_dict["external_account_id"] = row.external_account_id
impl_class = registry.get(row.protocol)
if impl_class is None:
raise RuntimeError(
f"Could not find implementation for protocol {row.protocol}"
)
settings_obj = impl_class.settings_class()(**settings_dict)
new_settings_dict = settings_obj.dict()
if row.settings != new_settings_dict:
new_settings = json_serializer(new_settings_dict)
log.info(
f"Updating settings for integration {row.integration_id} from {row.settings} to {new_settings}."
)
conn.execute(
"UPDATE integration_configurations SET settings = (%s) WHERE id = (%s)",
(new_settings, row.integration_id),
)

# Because collections now rely on integration_configurations, they can no longer
# have a null value for integration_configuration_id. This should already be true
# of our existing collections. We also drop our foreign key constraint, and recreate
# it with the correct ondelete behavior.
op.alter_column(
"collections",
"integration_configuration_id",
existing_type=sa.INTEGER(),
nullable=False,
)
op.drop_constraint(
"collections_integration_configuration_id_fkey",
"collections",
type_="foreignkey",
)
op.create_foreign_key(
"collections_integration_configuration_id_fkey",
"collections",
"integration_configurations",
["integration_configuration_id"],
["id"],
)

# The data that was in the collections_libraries table is now tracked by
# integration_library_configurations, we keep the data in the collections_libraries
# table for now, but we remove the foreign key constraints and indexes.
op.alter_column(
"collections_libraries",
"collection_id",
existing_type=sa.INTEGER(),
nullable=True,
)
op.alter_column(
"collections_libraries", "library_id", existing_type=sa.INTEGER(), nullable=True
)
op.drop_index(
"ix_collections_libraries_collection_id", table_name="collections_libraries"
)
op.drop_index(
"ix_collections_libraries_library_id", table_name="collections_libraries"
)
op.drop_constraint(
"collections_libraries_collection_id_fkey",
"collections_libraries",
type_="foreignkey",
)
op.drop_constraint(
"collections_libraries_library_id_fkey",
"collections_libraries",
type_="foreignkey",
)

# Collections have now been migrated entirely to use integration_configurations. We keep this column
# for now, but we remove the foreign key constraint and index.
op.drop_index("ix_collections_external_integration_id", table_name="collections")
op.drop_constraint(
"collections_external_integration_id_fkey", "collections", type_="foreignkey"
)

# We create a new index on the settings column of integration_configurations. This
# will allow us to quickly find integrations that have a specific setting.
op.create_index(
"ix_integration_configurations_settings_dict",
"integration_configurations",
["settings"],
unique=False,
postgresql_using="gin",
)


def downgrade() -> None:
op.drop_index(
"ix_integration_configurations_settings_dict",
table_name="integration_configurations",
postgresql_using="gin",
)

op.create_foreign_key(
"collections_external_integration_id_fkey",
"collections",
"externalintegrations",
["external_integration_id"],
["id"],
)
op.create_index(
"ix_collections_external_integration_id",
"collections",
["external_integration_id"],
unique=False,
)

op.create_foreign_key(
"collections_libraries_collection_id_fkey",
"collections_libraries",
"collections",
["collection_id"],
["id"],
)
op.create_foreign_key(
"collections_libraries_library_id_fkey",
"collections_libraries",
"libraries",
["library_id"],
["id"],
)
op.create_index(
"ix_collections_libraries_library_id",
"collections_libraries",
["library_id"],
unique=False,
)
op.create_index(
"ix_collections_libraries_collection_id",
"collections_libraries",
["collection_id"],
unique=False,
)
op.alter_column(
"collections_libraries",
"library_id",
existing_type=sa.INTEGER(),
nullable=False,
)
op.alter_column(
"collections_libraries",
"collection_id",
existing_type=sa.INTEGER(),
nullable=False,
)

op.drop_constraint(
"collections_integration_configuration_id_fkey",
"collections",
type_="foreignkey",
)
op.create_foreign_key(
"collections_integration_configuration_id_fkey",
"collections",
"integration_configurations",
["integration_configuration_id"],
["id"],
ondelete="SET NULL",
)
op.alter_column(
"collections",
"integration_configuration_id",
existing_type=sa.INTEGER(),
nullable=True,
)

op.drop_column("integration_configurations", "context")

op.create_index("ix_collections_name", "collections", ["name"], unique=False)
op.alter_column("collections", "name", existing_type=sa.VARCHAR(), nullable=False)
2 changes: 1 addition & 1 deletion api/admin/controller/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def setup_admin_controllers(manager: CirculationManager):
)
manager.admin_collection_settings_controller = CollectionSettingsController(manager)
manager.admin_collection_self_tests_controller = CollectionSelfTestsController(
manager
manager._db
)
manager.admin_sitewide_configuration_settings_controller = (
SitewideConfigurationSettingsController(manager)
Expand Down
100 changes: 39 additions & 61 deletions api/admin/controller/collection_self_tests.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,41 @@
from flask_babel import lazy_gettext as _
from __future__ import annotations

from api.admin.controller.self_tests import SelfTestsController
from api.admin.problem_details import *
from typing import Any, Dict, Optional

from flask import Response
from sqlalchemy.orm import Session

from api.admin.controller.self_tests import IntegrationSelfTestsController
from api.circulation import CirculationApiType
from api.integration.registry.license_providers import LicenseProvidersRegistry
from api.selftest import HasCollectionSelfTests
from core.model import Collection
from core.opds_import import OPDSImporter, OPDSImportMonitor


class CollectionSelfTestsController(SelfTestsController):
def __init__(self, manager):
super().__init__(manager)
self.type = _("collection")
self.registry = LicenseProvidersRegistry()
self.protocols = self._get_collection_protocols(self.registry.integrations)

def process_collection_self_tests(self, identifier):
return self._manage_self_tests(identifier)

def look_up_by_id(self, identifier):
"""Find the collection to display self test results or run self tests for;
display an error message if a collection with this ID turns out not to exist"""

collection = Collection.by_id(self._db, identifier)
if not collection:
return NO_SUCH_COLLECTION

self.protocol_class = self._find_protocol_class(collection)
return collection

def get_info(self, collection):
"""Compile information about this collection, including the results from the last time, if ever,
that the self tests were run."""

return dict(
id=collection.id,
name=collection.name,
protocol=collection.protocol,
parent_id=collection.parent_id,
settings=dict(external_account_id=collection.external_account_id),
)

def _find_protocol_class(self, collection):
"""Figure out which protocol is providing books to this collection"""
return self.registry.get(collection.protocol)

def run_tests(self, collection):
collection_protocol = collection.protocol or None

if self.protocol_class:
value = None
if collection_protocol == OPDSImportMonitor.PROTOCOL:
self.protocol_class = OPDSImportMonitor
value, results = self.protocol_class.run_self_tests(
self._db, self.protocol_class, self._db, collection, OPDSImporter
)
elif issubclass(self.protocol_class, HasCollectionSelfTests):
value, results = self.protocol_class.run_self_tests(
self._db, self.protocol_class, self._db, collection
)

return value
from core.integration.registry import IntegrationRegistry
from core.model import IntegrationConfiguration
from core.selftest import HasSelfTestsIntegrationConfiguration
from core.util.problem_detail import ProblemDetail


class CollectionSelfTestsController(IntegrationSelfTestsController[CirculationApiType]):
def __init__(
self,
db: Session,
registry: Optional[IntegrationRegistry[CirculationApiType]] = None,
):
registry = registry or LicenseProvidersRegistry()
super().__init__(db, registry)

def process_collection_self_tests(
self, identifier: Optional[int]
) -> Response | ProblemDetail:
return self.process_self_tests(identifier)

def run_self_tests(
self, integration: IntegrationConfiguration
) -> Optional[Dict[str, Any]]:
protocol_class = self.get_protocol_class(integration)
if issubclass(protocol_class, HasSelfTestsIntegrationConfiguration):
test_result, _ = protocol_class.run_self_tests(
self.db, protocol_class, self.db, integration.collection
)
return test_result

return None
Loading
Loading