Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#2871] Refactor ZGW imports to work on different environments #1497

Merged
merged 2 commits into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions src/open_inwoner/openzaak/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from django.template.defaultfilters import filesizeformat
from django.template.response import TemplateResponse
from django.urls import path, reverse
from django.utils.html import format_html, format_html_join
from django.utils.translation import gettext_lazy as _, ngettext

from import_export.admin import ImportExportMixin
Expand Down Expand Up @@ -174,11 +175,30 @@ def process_file_view(self, request):
self.message_user(
request,
_(
"Successfully processed %(num_rows)d items"
% {"num_rows": import_result.total_rows_processed}
"%(num_rows)d item(s) processed in total, with %(error_rows)d failing row(s)."
% {
"num_rows": import_result.total_rows_processed,
"error_rows": len(import_result.import_errors),
}
),
messages.SUCCESS,
messages.SUCCESS
if not import_result.import_errors
else messages.WARNING,
)
if errors := import_result.import_errors:
msgs_deduped = set(error.__str__() for error in errors)
error_msg_iterator = ([msg] for msg in msgs_deduped)

error_msg_html = format_html_join(
"\n", "<li>{}</li>", error_msg_iterator
)
error_msg_html_ordered = format_html(
_("It was not possible to import the following items:")
+ f"<ol> {error_msg_html} </ol>"
)
self.message_user(
request, error_msg_html_ordered, messages.ERROR
)

return HttpResponseRedirect(
reverse(
Expand Down
264 changes: 197 additions & 67 deletions src/open_inwoner/openzaak/import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
from typing import IO, Any, Generator, Self
from urllib.parse import urlparse

from django.apps import apps
from django.core import serializers
from django.core.exceptions import MultipleObjectsReturned, ObjectDoesNotExist
from django.core.files.storage import Storage
from django.core.serializers.base import DeserializationError
from django.db import transaction
from django.db.models import QuerySet

Expand All @@ -22,6 +25,132 @@
logger = logging.getLogger(__name__)


class ZGWImportError(Exception):
@classmethod
def extract_error_data(cls, exc: Exception, jsonl: str):
exc_source = type(exc.__context__)
data = json.loads(jsonl) if jsonl else {}
source_config = apps.get_model(data["model"])

# error type
if exc_source is CatalogusConfig.DoesNotExist or source_config.DoesNotExist:
error_type = ObjectDoesNotExist
if exc_source is source_config.MultipleObjectsReturned:
error_type = MultipleObjectsReturned

# metadata about source_config
items = []
fields = data.get("fields", None)
if source_config is CatalogusConfig:
items = [
f"Domein = {fields['domein']}",
f"Rsin = {fields['rsin']}",
]
if source_config is ZaakTypeConfig:
items = [
f"Identificatie = {fields['identificatie']}",
f"Catalogus domein = {fields['catalogus'][0]}",
f"Catalogus rsin = {fields['catalogus'][1]}",
]
if source_config in {
ZaakTypeStatusTypeConfig,
ZaakTypeResultaatTypeConfig,
ZaakTypeInformatieObjectTypeConfig,
}:
items = [
f"omschrijving = {fields['omschrijving']}",
f"ZaakTypeConfig identificatie = {fields['zaaktype_config'][0]}",
f"Catalogus domein = {fields['zaaktype_config'][1]}",
f"Catalogus rsin = {fields['zaaktype_config'][2]}",
]

return {
"error_type": error_type,
"source_config_name": source_config.__name__,
"info": ", ".join(items),
}

@classmethod
def from_exception_and_jsonl(cls, exception: Exception, jsonl: str) -> Self:
error_data = cls.extract_error_data(exception, jsonl)

error_template = (
"%(source_config_name)s not found in target environment: %(info)s"
)
if error_data["error_type"] is MultipleObjectsReturned:
error_template = "Got multiple results for %(source_config_name)s: %(info)s"

return cls(error_template % error_data)


def check_catalogus_config_exists(source_config: CatalogusConfig, jsonl: str):
try:
CatalogusConfig.objects.get_by_natural_key(
domein=source_config.domein, rsin=source_config.rsin
)
except CatalogusConfig.MultipleObjectsReturned as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
except CatalogusConfig.DoesNotExist as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)


def _update_config(source, target, exclude_fields):
for field in source._meta.fields:
field_name = field.name

if field_name in exclude_fields:
continue

val = getattr(source, field_name, None)
setattr(target, field_name, val)
target.save()


def _update_zaaktype_config(source_config: ZaakTypeConfig, jsonl: str):
try:
target = ZaakTypeConfig.objects.get_by_natural_key(
identificatie=source_config.identificatie,
catalogus_domein=source_config.catalogus.domein,
catalogus_rsin=source_config.catalogus.rsin,
)
except ZaakTypeConfig.MultipleObjectsReturned as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
except (CatalogusConfig.DoesNotExist, ZaakTypeConfig.DoesNotExist) as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
else:
exclude_fields = [
"id",
"catalogus",
"urls",
"zaaktype_uuids",
]
_update_config(source_config, target, exclude_fields)


def _update_nested_zgw_config(
source_config: ZaakTypeStatusTypeConfig
| ZaakTypeResultaatTypeConfig
| ZaakTypeInformatieObjectTypeConfig,
exclude_fields: list[str],
jsonl: str,
):
zaaktype_config_identificatie = source_config.zaaktype_config.identificatie
catalogus_domein = source_config.zaaktype_config.catalogus.domein
catalogus_rsin = source_config.zaaktype_config.catalogus.rsin

try:
target = source_config.__class__.objects.get_by_natural_key(
omschrijving=source_config.omschrijving,
zaaktype_config_identificatie=zaaktype_config_identificatie,
catalogus_domein=catalogus_domein,
catalogus_rsin=catalogus_rsin,
)
except (source_config.DoesNotExist, source_config.MultipleObjectsReturned) as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
else:
_update_config(source_config, target, exclude_fields)


@dataclasses.dataclass(frozen=True)
class CatalogusConfigExport:
"""Gather and export CatalogusConfig(s) and all associated relations."""
Expand Down Expand Up @@ -113,9 +242,10 @@ class CatalogusConfigImport:
total_rows_processed: int = 0
catalogus_configs_imported: int = 0
zaaktype_configs_imported: int = 0
zaak_inormatie_object_type_configs_imported: int = 0
zaak_informatie_object_type_configs_imported: int = 0
zaak_status_type_configs_imported: int = 0
zaak_resultaat_type_configs_imported: int = 0
import_errors: list | None = None

@staticmethod
def _get_url_root(url: str) -> str:
Expand Down Expand Up @@ -149,85 +279,85 @@ def _lines_iter_from_jsonl_stream_or_string(
# Reset the stream in case it gets re-used
lines.seek(0)

@classmethod
def _rewrite_jsonl_url_references(
cls, stream_or_string: IO | str
) -> Generator[str, Any, None]:
# The assumption is that the exporting and importing instance both have
# a `Service` with the same slug as the `Service` referenced in the
# `configued_from` attribute of the imported CatalogusConfig. The
# assumption is further that all URLs in the imported objects are
# prefixed by an URL that matches the API root in the service. Because
# of this, the import file will contain URLs with a base URL pointing to
# the `api_root`` of the `configured_from` Service on the _source_
# instance, and has to be re-written to match the `api_root` of the
# `configured_from` Service on the _target_ instance. Put differently,
# we assume that we are migrating ZGW objects that _do not differ_ as
# far as the ZGW objects themselves are concerned (apart from the URL,
# they essentially point to the same ZGW backend), but that they _do_
# differ in terms of additional model fields that do not have their
# source of truth in the ZGW backends.
#
# This expectation is also encoded in our API clients: you can only
# fetch ZGW objects using the ApePie clients if the root of those
# objects matches the configured API root.

base_url_mapping = {}
for deserialized_object in serializers.deserialize(
"jsonl",
filter(
lambda row: ('"model": "openzaak.catalogusconfig"' in row),
cls._lines_iter_from_jsonl_stream_or_string(stream_or_string),
),
):
object_type: str = deserialized_object.object.__class__.__name__

if object_type == "CatalogusConfig":
target_base_url = cls._get_url_root(
deserialized_object.object.service.api_root
)
source_base_url = cls._get_url_root(deserialized_object.object.url)
base_url_mapping[source_base_url] = target_base_url
else:
# https://www.xkcd.com/2200/
logger.error(
"Tried to filter for catalogus config objects, but also got: %s",
object_type,
)

for line in cls._lines_iter_from_jsonl_stream_or_string(stream_or_string):
source_url_found = False
for source, target in base_url_mapping.items():
line = line.replace(source, target)
source_url_found = True

if not source_url_found:
raise ValueError("Unable to rewrite ZGW urls")

yield line

@classmethod
@transaction.atomic()
def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self:
model_to_counter_mapping = {
"CatalogusConfig": "catalogus_configs_imported",
"ZaakTypeConfig": "zaaktype_configs_imported",
"ZaakTypeInformatieObjectTypeConfig": "zaak_inormatie_object_type_configs_imported",
"ZaakTypeInformatieObjectTypeConfig": "zaak_informatie_object_type_configs_imported",
"ZaakTypeStatusTypeConfig": "zaak_status_type_configs_imported",
"ZaakTypeResultaatTypeConfig": "zaak_resultaat_type_configs_imported",
}

object_type_counts = defaultdict(int)

for deserialized_object in serializers.deserialize(
"jsonl", cls._rewrite_jsonl_url_references(stream_or_string)
):
deserialized_object.save()
object_type = deserialized_object.object.__class__.__name__
object_type_counts[object_type] += 1
rows_successfully_processed = 0
import_errors = []
for line in cls._lines_iter_from_jsonl_stream_or_string(stream_or_string):
try:
(deserialized_object,) = serializers.deserialize(
"jsonl",
line,
use_natural_primary_keys=True,
use_natural_foreign_keys=True,
)
except DeserializationError as exc:
error = ZGWImportError.from_exception_and_jsonl(exc, line)
logger.error(error)
import_errors.append(error)
else:
source_config = deserialized_object.object
try:
match source_config:
case CatalogusConfig():
check_catalogus_config_exists(
source_config=source_config, jsonl=line
)
case ZaakTypeConfig():
_update_zaaktype_config(
source_config=source_config, jsonl=line
)
case ZaakTypeInformatieObjectTypeConfig():
exclude_fields = [
"id",
"zaaktype_config",
"zaaktype_uuids",
"informatieobjecttype_url",
]
_update_nested_zgw_config(
source_config, exclude_fields, line
)
case ZaakTypeStatusTypeConfig():
exclude_fields = [
"id",
"zaaktype_config",
"zaaktype_uuids",
"statustype_url",
]
_update_nested_zgw_config(
source_config, exclude_fields, line
)
case ZaakTypeResultaatTypeConfig():
exclude_fields = [
"id",
"zaaktype_config",
"zaaktype_uuids",
"resultaattype_url",
]
_update_nested_zgw_config(
source_config, exclude_fields, line
)
except ZGWImportError as exc:
logger.error(exc)
import_errors.append(exc)
else:
object_type = source_config.__class__.__name__
object_type_counts[object_type] += 1
rows_successfully_processed += 1

creation_kwargs = {
"total_rows_processed": sum(object_type_counts.values()),
"total_rows_processed": rows_successfully_processed + len(import_errors),
"import_errors": import_errors,
}

for model_name, counter_field in model_to_counter_mapping.items():
Expand Down
Loading
Loading