Skip to content

Commit

Permalink
[#2913] Check for duplicate keys in ZGW imports
Browse files Browse the repository at this point in the history
  • Loading branch information
pi-sigma committed Dec 3, 2024
1 parent 0d82080 commit 70964c6
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 26 deletions.
49 changes: 32 additions & 17 deletions src/open_inwoner/openzaak/import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,17 @@

class ZGWImportError(Exception):
@classmethod
def extract_error_data(cls, exc: Exception, jsonl: str):
exc_source = type(exc.__context__)
def extract_error_data(cls, exception: Exception | None = None, jsonl: str = ""):
data = json.loads(jsonl) if jsonl else {}
source_config = apps.get_model(data["model"])

# error type
if exc_source is CatalogusConfig.DoesNotExist or source_config.DoesNotExist:
error_type = ObjectDoesNotExist
if exc_source is source_config.MultipleObjectsReturned:
error_type = MultipleObjectsReturned
error_type = None
if exception:
exc_source = type(exception.__context__)
if exc_source is CatalogusConfig.DoesNotExist or source_config.DoesNotExist:
error_type = ObjectDoesNotExist
if exc_source is source_config.MultipleObjectsReturned:
error_type = MultipleObjectsReturned

# metadata about source_config
items = []
Expand All @@ -60,8 +61,6 @@ def extract_error_data(cls, exc: Exception, jsonl: str):
items = [
f"omschrijving = {fields['omschrijving']!r}",
f"ZaakTypeConfig identificatie = {fields['zaaktype_config'][0]!r}",
f"Catalogus domein = {fields['zaaktype_config'][1]!r}",
f"Catalogus rsin = {fields['zaaktype_config'][2]!r}",
]

return {
Expand All @@ -71,8 +70,8 @@ def extract_error_data(cls, exc: Exception, jsonl: str):
}

@classmethod
def from_exception_and_jsonl(cls, exception: Exception, jsonl: str) -> Self:
error_data = cls.extract_error_data(exception, jsonl)
def from_exception_and_jsonl(cls, jsonl: str, exception: Exception) -> Self:
error_data = cls.extract_error_data(exception=exception, jsonl=jsonl)

error_template = (
"%(source_config_name)s not found in target environment: %(info)s"
Expand All @@ -82,16 +81,25 @@ def from_exception_and_jsonl(cls, exception: Exception, jsonl: str) -> Self:

return cls(error_template % error_data)

@classmethod
def from_jsonl(cls, jsonl: str) -> Self:
error_data = cls.extract_error_data(jsonl=jsonl)
error_template = (
"%(source_config_name)s was processed multiple times because it contains "
"duplicate natural keys: %(info)s"
)
return cls(error_template % error_data)


def check_catalogus_config_exists(source_config: CatalogusConfig, jsonl: str):
try:
CatalogusConfig.objects.get_by_natural_key(
domein=source_config.domein, rsin=source_config.rsin
)
except CatalogusConfig.MultipleObjectsReturned as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
raise ZGWImportError.from_exception_and_jsonl(exception=exc, jsonl=jsonl)
except CatalogusConfig.DoesNotExist as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
raise ZGWImportError.from_exception_and_jsonl(exception=exc, jsonl=jsonl)


def _update_config(source, target, exclude_fields):
Expand All @@ -114,9 +122,9 @@ def _update_zaaktype_config(source_config: ZaakTypeConfig, jsonl: str):
catalogus_rsin=source_config.catalogus.rsin,
)
except ZaakTypeConfig.MultipleObjectsReturned as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
raise ZGWImportError.from_exception_and_jsonl(exception=exc, jsonl=jsonl)
except (CatalogusConfig.DoesNotExist, ZaakTypeConfig.DoesNotExist) as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
raise ZGWImportError.from_exception_and_jsonl(exception=exc, jsonl=jsonl)
else:
exclude_fields = [
"id",
Expand Down Expand Up @@ -146,7 +154,7 @@ def _update_nested_zgw_config(
catalogus_rsin=catalogus_rsin,
)
except (source_config.DoesNotExist, source_config.MultipleObjectsReturned) as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
raise ZGWImportError.from_exception_and_jsonl(exception=exc, jsonl=jsonl)
else:
_update_config(source_config, target, exclude_fields)

Expand Down Expand Up @@ -293,6 +301,7 @@ def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self:

rows_successfully_processed = 0
import_errors = []
natural_keys_seen = set()
for line in cls._lines_iter_from_jsonl_stream_or_string(stream_or_string):
try:
(deserialized_object,) = serializers.deserialize(
Expand All @@ -302,11 +311,17 @@ def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self:
use_natural_foreign_keys=True,
)
except DeserializationError as exc:
error = ZGWImportError.from_exception_and_jsonl(exc, line)
error = ZGWImportError.from_exception_and_jsonl(
exception=exc, jsonl=line
)
logger.error(error)
import_errors.append(error)
else:
source_config = deserialized_object.object
if (natural_key := source_config.natural_key()) in natural_keys_seen:
import_errors.append(ZGWImportError.from_jsonl(line))
continue
natural_keys_seen.add(natural_key)
try:
match source_config:
case CatalogusConfig():
Expand Down
10 changes: 5 additions & 5 deletions src/open_inwoner/openzaak/tests/test_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,18 +293,18 @@ def test_import_flow_reports_errors(self) -> None:
messages[1],
)
self.assertIn(
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'status omschrijving', "
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'",
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'bogus', "
"ZaakTypeConfig identificatie = 'ztc-id-a-0'",
messages[1],
)
self.assertIn(
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'bogus', ZaakTypeConfig "
"identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'",
"identificatie = 'ztc-id-a-0'",
messages[1],
)
self.assertIn(
"ZaakTypeInformatieObjectTypeConfig not found in target environment: omschrijving = 'informatieobject', "
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'",
"ZaakTypeConfig identificatie = 'ztc-id-a-0'",
messages[1],
)
self.assertIn(
Expand All @@ -313,7 +313,7 @@ def test_import_flow_reports_errors(self) -> None:
)
self.assertIn(
"ZaakTypeResultaatTypeConfig not found in target environment: omschrijving = 'resultaat', "
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'",
"ZaakTypeConfig identificatie = 'ztc-id-a-0'",
messages[1],
)

Expand Down
45 changes: 41 additions & 4 deletions src/open_inwoner/openzaak/tests/test_import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,11 @@ def setUp(self):
'{"model": "openzaak.zaaktypestatustypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "statustype_url": "https://bar.maykinmedia.nl", "omschrijving": "status omschrijving", "statustekst": "statustekst nieuw", "zaaktype_uuids": "[]", "status_indicator": "", "status_indicator_text": "", "document_upload_description": "", "description": "status", "notify_status_change": true, "action_required": false, "document_upload_enabled": true, "call_to_action_url": "", "call_to_action_text": "", "case_link_text": ""}}',
'{"model": "openzaak.zaaktyperesultaattypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "resultaattype_url": "https://bar.maykinmedia.nl", "omschrijving": "resultaat", "zaaktype_uuids": "[]", "description": "description new"}}',
]
self.json_dupes = [
'{"model": "openzaak.zaaktypestatustypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "statustype_url": "https://bar.maykinmedia.nl", "omschrijving": "status omschrijving", "statustekst": "statustekst nieuw", "zaaktype_uuids": "[]", "status_indicator": "", "status_indicator_text": "", "document_upload_description": "", "description": "status", "notify_status_change": true, "action_required": false, "document_upload_enabled": true, "call_to_action_url": "", "call_to_action_text": "", "case_link_text": ""}}',
]
self.jsonl = "\n".join(self.json_lines)
self.jsonl_with_dupes = "\n".join(self.json_lines + self.json_dupes)

def test_import_jsonl_update_success(self):
mocks = ZGWExportImportMockData()
Expand Down Expand Up @@ -373,7 +377,7 @@ def test_import_jsonl_missing_statustype_config(self):
)
expected_error = ZGWImportError(
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'bogus', "
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'"
"ZaakTypeConfig identificatie = 'ztc-id-a-0'"
)
import_expected = dataclasses.asdict(
CatalogusConfigImport(
Expand Down Expand Up @@ -419,7 +423,7 @@ def test_import_jsonl_update_statustype_config_missing_zt_config(self):
)
expected_error = ZGWImportError(
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'status omschrijving', "
"ZaakTypeConfig identificatie = 'bogus', Catalogus domein = 'DM-1', Catalogus rsin = '666666666'"
"ZaakTypeConfig identificatie = 'bogus'"
)
import_expected = dataclasses.asdict(
CatalogusConfigImport(
Expand All @@ -445,7 +449,7 @@ def test_import_jsonl_update_statustype_config_missing_zt_config(self):
self.assertEqual(ZaakTypeStatusTypeConfig.objects.count(), 1)
self.assertEqual(ZaakTypeResultaatTypeConfig.objects.count(), 1)

def test_import_jsonl_update_reports_duplicates(self):
def test_import_jsonl_update_reports_duplicate_db_records(self):
mocks = ZGWExportImportMockData()

ZaakTypeResultaatTypeConfigFactory(
Expand All @@ -465,7 +469,7 @@ def test_import_jsonl_update_reports_duplicates(self):
)
expected_error = ZGWImportError(
"Got multiple results for ZaakTypeResultaatTypeConfig: omschrijving = 'resultaat', "
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'"
"ZaakTypeConfig identificatie = 'ztc-id-a-0'"
)
import_expected = dataclasses.asdict(
CatalogusConfigImport(
Expand All @@ -484,6 +488,39 @@ def test_import_jsonl_update_reports_duplicates(self):
# check import
self.assertEqual(import_result, import_expected)

def test_import_jsonl_update_reports_duplicate_natural_keys_in_upload_file(self):
mocks = ZGWExportImportMockData()

self.storage.save("import.jsonl", io.StringIO(self.jsonl_with_dupes))

# we use `asdict` and replace the Exceptions with string representations
# because for Exceptions raised from within dataclasses, equality ==/is identity
import_result = dataclasses.asdict(
CatalogusConfigImport.import_from_jsonl_file_in_django_storage(
"import.jsonl", self.storage
)
)
expected_error = ZGWImportError(
"ZaakTypeStatusTypeConfig was processed multiple times because it contains duplicate "
"natural keys: omschrijving = 'status omschrijving', ZaakTypeConfig identificatie = 'ztc-id-a-0'"
)
import_expected = dataclasses.asdict(
CatalogusConfigImport(
total_rows_processed=6,
catalogus_configs_imported=1,
zaaktype_configs_imported=1,
zaak_informatie_object_type_configs_imported=1,
zaak_status_type_configs_imported=1,
zaak_resultaat_type_configs_imported=1,
import_errors=[expected_error],
),
)
import_result["import_errors"][0] = str(import_result["import_errors"][0])
import_expected["import_errors"][0] = str(import_expected["import_errors"][0])

# check import
self.assertEqual(import_result, import_expected)

def test_import_jsonl_fails_with_catalogus_domein_rsin_mismatch(self):
service = ServiceFactory(slug="service-0")
CatalogusConfigFactory(
Expand Down

0 comments on commit 70964c6

Please sign in to comment.