Skip to content

Commit

Permalink
[#2871] Refactor ZGW import logic
Browse files Browse the repository at this point in the history
    - overwrite only editable fields when importing ZGW entities,
      skip read-only fields (url, domein, rsin)
  • Loading branch information
pi-sigma committed Nov 19, 2024
1 parent 00dcc94 commit a5c818e
Show file tree
Hide file tree
Showing 2 changed files with 323 additions and 247 deletions.
259 changes: 188 additions & 71 deletions src/open_inwoner/openzaak/import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from django.core import serializers
from django.core.files.storage import Storage
from django.core.serializers.base import DeserializationError
from django.db import transaction
from django.db.models import QuerySet

Expand All @@ -22,6 +23,146 @@
logger = logging.getLogger(__name__)


class ZGWImportError(Exception):
pass


def check_catalogus_config_exists(source_config):
try:
CatalogusConfig.objects.get_by_natural_key(
domein=source_config.domein, rsin=source_config.rsin
)
except CatalogusConfig.MultipleObjectsReturned:
raise ZGWImportError(
"Got multiple results for CatalogusConfig with domain={domein} and rsin={rsin}".format(
domein=source_config.domein,
rsin=source_config.rsin,
)
)
except CatalogusConfig.DoesNotExist:
raise ZGWImportError(
"CatalogusConfig not found in target environment: domein={domein} and rsin={rsin}".format(
domein=source_config.domein, rsin=source_config.rsin
),
)


def update_zaaktype_config(source_config):
catalogus_domein = source_config.catalogus.domein
catalogus_rsin = source_config.catalogus.rsin

try:
target = ZaakTypeConfig.objects.get_by_natural_key(
identificatie=source_config.identificatie,
catalogus_domein=catalogus_domein,
catalogus_rsin=source_config.catalogus.rsin,
)
except ZaakTypeConfig.MultipleObjectsReturned:
raise ZGWImportError(
"Got multiple results for ZaakTypeConfig with identificatie={identificatie}, "
"catalogus domein={domein} and catalogus_rsin={rsin}".format(
identificatie=source_config.identificatie,
domein=catalogus_domein,
rsin=catalogus_rsin,
)
)
except (CatalogusConfig.DoesNotExist, ZaakTypeConfig.DoesNotExist):
raise ZGWImportError(
"ZaakTypeConfig not found in target environment: identificatie={identificatie}, "
"catalogus domein={domein}, catalogus rsin={rsin}".format(
identificatie=source_config.identificatie,
domein=source_config.domein,
rsin=source_config.rsin,
),
)
else:
update_fields = [
"notify_status_changes",
"external_document_upload_url",
"document_upload_enabled",
"contact_form_enabled",
"contact_subject_code",
"relevante_zaakperiode",
]
for field in update_fields:
val = getattr(source_config, field, None)
setattr(target, field, val)
target.save()


def _update_nested_zgw_config(source_config: type, update_fields: list[str]):
zaaktype_config_identificatie = source_config.zaaktype_config.identificatie
catalogus_domein = source_config.zaaktype_config.catalogus.domein
catalogus_rsin = source_config.zaaktype_config.catalogus.rsin

try:
target = source_config.__class__.objects.get_by_natural_key(
omschrijving=source_config.omschrijving,
zaak_type_config_identificatie=zaaktype_config_identificatie,
catalogus_domein=catalogus_domein,
catalogus_rsin=catalogus_rsin,
)
except ZaakTypeInformatieObjectTypeConfig.MultipleObjectsReturned:
raise ZGWImportError(
f"Got multiple results for {source_config.__class__.__name__} with: "
"zaaktype config={zaaktype_config_identificatie}, catalogus_domein={catalogus_domein}, "
"catalogus_rsin={catalogus_rsin}".format(
zaaktype_config_identificatie=zaaktype_config_identificatie,
catalogus_domein=catalogus_domein,
catalogus_rsin=catalogus_rsin,
),
)
except ZaakTypeInformatieObjectTypeConfig.DoesNotExist:
raise ZGWImportError(
f"{source_config.__class__.__name__} not found in target environment: "
"omschrijving={omschrijving}, catalogus domein={domein}, catalogus rsin={rsin}".format(
omschrijving=source_config.omschrijving,
domein=catalogus_domein,
rsin=catalogus_rsin,
),
)
else:
for field in update_fields:
val = getattr(source_config, field, None)
setattr(target, field, val)
target.save()


def update_zaaktype_informatie_objecttype_config(source_config):
update_fields = [
"zaaktype_uuids",
"document_upload_enabled",
"document_notification_enabled",
]
_update_nested_zgw_config(source_config, update_fields)


def update_zaaktype_statustype_config(source_config):
update_fields = [
"statustekst",
"zaaktype_uuids",
"status_indicator",
"status_indicator_text",
"document_upload_description",
"desciption",
"notify_status_change",
"action_required",
"document_upload_enabled",
"call_to_action_url",
"call_to_action_text",
"case_link_text",
]
_update_nested_zgw_config(source_config, update_fields)


def update_zaaktype_resultaattype_config(source_config):
update_fields = [
"zaaktype_uuids",
"description",
]
_update_nested_zgw_config(source_config, update_fields)


@dataclasses.dataclass(frozen=True)
class CatalogusConfigExport:
"""Gather and export CatalogusConfig(s) and all associated relations."""
Expand Down Expand Up @@ -113,9 +254,10 @@ class CatalogusConfigImport:
total_rows_processed: int = 0
catalogus_configs_imported: int = 0
zaaktype_configs_imported: int = 0
zaak_inormatie_object_type_configs_imported: int = 0
zaak_informatie_object_type_configs_imported: int = 0
zaak_status_type_configs_imported: int = 0
zaak_resultaat_type_configs_imported: int = 0
import_errors: list | None = None

@staticmethod
def _get_url_root(url: str) -> str:
Expand Down Expand Up @@ -149,90 +291,65 @@ def _lines_iter_from_jsonl_stream_or_string(
# Reset the stream in case it gets re-used
lines.seek(0)

@classmethod
def _rewrite_jsonl_url_references(
cls, stream_or_string: IO | str
) -> Generator[str, Any, None]:
# The assumption is that the exporting and importing instance both have
# a `Service` with the same slug as the `Service` referenced in the
# `configued_from` attribute of the imported CatalogusConfig. The
# assumption is further that all URLs in the imported objects are
# prefixed by an URL that matches the API root in the service. Because
# of this, the import file will contain URLs with a base URL pointing to
# the `api_root`` of the `configured_from` Service on the _source_
# instance, and has to be re-written to match the `api_root` of the
# `configured_from` Service on the _target_ instance. Put differently,
# we assume that we are migrating ZGW objects that _do not differ_ as
# far as the ZGW objects themselves are concerned (apart from the URL,
# they essentially point to the same ZGW backend), but that they _do_
# differ in terms of additional model fields that do not have their
# source of truth in the ZGW backends.
#
# This expectation is also encoded in our API clients: you can only
# fetch ZGW objects using the ApePie clients if the root of those
# objects matches the configured API root.

base_url_mapping = {}
for deserialized_object in serializers.deserialize(
"jsonl",
filter(
lambda row: ('"model": "openzaak.catalogusconfig"' in row),
cls._lines_iter_from_jsonl_stream_or_string(stream_or_string),
),
use_natural_foreign_keys=True,
use_natural_primary_keys=True,
):
object_type: str = deserialized_object.object.__class__.__name__

if object_type == "CatalogusConfig":
target_base_url = cls._get_url_root(
deserialized_object.object.service.api_root
)
source_base_url = cls._get_url_root(deserialized_object.object.url)
base_url_mapping[source_base_url] = target_base_url
else:
# https://www.xkcd.com/2200/
logger.error(
"Tried to filter for catalogus config objects, but also got: %s",
object_type,
)

for line in cls._lines_iter_from_jsonl_stream_or_string(stream_or_string):
source_url_found = False
for source, target in base_url_mapping.items():
line = line.replace(source, target)
source_url_found = True

if not source_url_found:
raise ValueError("Unable to rewrite ZGW urls")

yield line

@classmethod
@transaction.atomic()
def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self:
model_to_counter_mapping = {
"CatalogusConfig": "catalogus_configs_imported",
"ZaakTypeConfig": "zaaktype_configs_imported",
"ZaakTypeInformatieObjectTypeConfig": "zaak_inormatie_object_type_configs_imported",
"ZaakTypeInformatieObjectTypeConfig": "zaak_informatie_object_type_configs_imported",
"ZaakTypeStatusTypeConfig": "zaak_status_type_configs_imported",
"ZaakTypeResultaatTypeConfig": "zaak_resultaat_type_configs_imported",
}

object_type_counts = defaultdict(int)

for deserialized_object in serializers.deserialize(
"jsonl",
cls._rewrite_jsonl_url_references(stream_or_string),
use_natural_foreign_keys=True,
use_natural_primary_keys=True,
):
deserialized_object.save()
object_type = deserialized_object.object.__class__.__name__
object_type_counts[object_type] += 1
import_errors = []
for line in cls._lines_iter_from_jsonl_stream_or_string(stream_or_string):
try:
(deserialized_object,) = serializers.deserialize(
"jsonl",
line,
use_natural_primary_keys=True,
use_natural_foreign_keys=True,
)
except DeserializationError as exc:
exc_source = type(exc.__context__)
if (
exc_source is CatalogusConfig.DoesNotExist
or ZaakTypeConfig.DoesNotExist
):
logger.error(exc)
import_errors.append(exc)
else:
source_config = deserialized_object.object
try:
match source_config:
case CatalogusConfig():
check_catalogus_config_exists(source_config=source_config)
case ZaakTypeConfig():
update_zaaktype_config(source_config=source_config)
case ZaakTypeInformatieObjectTypeConfig():
update_zaaktype_informatie_objecttype_config(
source_config=source_config
)
case ZaakTypeStatusTypeConfig():
update_zaaktype_statustype_config(
source_config=source_config
)
case ZaakTypeResultaatTypeConfig():
update_zaaktype_resultaattype_config(
source_config=source_config
)
except ZGWImportError as exc:
logger.error(exc)
import_errors.append(exc)
else:
object_type = deserialized_object.object.__class__.__name__
object_type_counts[object_type] += 1

creation_kwargs = {
"total_rows_processed": sum(object_type_counts.values()),
"import_errors": import_errors,
}

for model_name, counter_field in model_to_counter_mapping.items():
Expand Down
Loading

0 comments on commit a5c818e

Please sign in to comment.