From bcb3da040fbdc347979824de2d3499631ee6fde5 Mon Sep 17 00:00:00 2001 From: Paul Schilling Date: Thu, 28 Nov 2024 16:36:53 +0100 Subject: [PATCH] [#2913] Check for duplicate keys in ZGW imports --- src/open_inwoner/openzaak/import_export.py | 65 ++++++++++++------- src/open_inwoner/openzaak/tests/test_admin.py | 10 +-- .../openzaak/tests/test_import_export.py | 45 +++++++++++-- 3 files changed, 89 insertions(+), 31 deletions(-) diff --git a/src/open_inwoner/openzaak/import_export.py b/src/open_inwoner/openzaak/import_export.py index 519bf36f26..cc1f285866 100644 --- a/src/open_inwoner/openzaak/import_export.py +++ b/src/open_inwoner/openzaak/import_export.py @@ -27,16 +27,19 @@ class ZGWImportError(Exception): @classmethod - def extract_error_data(cls, exc: Exception, jsonl: str): - exc_source = type(exc.__context__) - data = json.loads(jsonl) if jsonl else {} + def extract_error_data( + cls, exception: Exception | None = None, jsonl: str | None = None + ): + data = json.loads(jsonl) if jsonl is not None else {} source_config = apps.get_model(data["model"]) - # error type - if exc_source is CatalogusConfig.DoesNotExist or source_config.DoesNotExist: - error_type = ObjectDoesNotExist - if exc_source is source_config.MultipleObjectsReturned: - error_type = MultipleObjectsReturned + error_type = None + if exception: + exc_source = type(exception.__context__) + if exc_source is CatalogusConfig.DoesNotExist or source_config.DoesNotExist: + error_type = ObjectDoesNotExist + if exc_source is source_config.MultipleObjectsReturned: + error_type = MultipleObjectsReturned # metadata about source_config items = [] @@ -60,8 +63,6 @@ def extract_error_data(cls, exc: Exception, jsonl: str): items = [ f"omschrijving = {fields['omschrijving']!r}", f"ZaakTypeConfig identificatie = {fields['zaaktype_config'][0]!r}", - f"Catalogus domein = {fields['zaaktype_config'][1]!r}", - f"Catalogus rsin = {fields['zaaktype_config'][2]!r}", ] return { @@ -71,8 +72,8 @@ def extract_error_data(cls, exc: Exception, jsonl: str): } @classmethod - def from_exception_and_jsonl(cls, exception: Exception, jsonl: str) -> Self: - error_data = cls.extract_error_data(exception, jsonl) + def from_exception_and_jsonl(cls, jsonl: str, exception: Exception) -> Self: + error_data = cls.extract_error_data(exception=exception, jsonl=jsonl) error_template = ( "%(source_config_name)s not found in target environment: %(info)s" @@ -82,16 +83,26 @@ def from_exception_and_jsonl(cls, exception: Exception, jsonl: str) -> Self: return cls(error_template % error_data) + @classmethod + def from_jsonl(cls, jsonl: str) -> Self: + error_data = cls.extract_error_data(jsonl=jsonl) + error_template = ( + "%(source_config_name)s was processed multiple times because it contains " + "duplicate natural keys: %(info)s" + ) + return cls(error_template % error_data) + def check_catalogus_config_exists(source_config: CatalogusConfig, jsonl: str): try: CatalogusConfig.objects.get_by_natural_key( domein=source_config.domein, rsin=source_config.rsin ) - except CatalogusConfig.MultipleObjectsReturned as exc: - raise ZGWImportError.from_exception_and_jsonl(exc, jsonl) - except CatalogusConfig.DoesNotExist as exc: - raise ZGWImportError.from_exception_and_jsonl(exc, jsonl) + except ( + CatalogusConfig.DoesNotExist, + CatalogusConfig.MultipleObjectsReturned, + ) as exc: + raise ZGWImportError.from_exception_and_jsonl(exception=exc, jsonl=jsonl) def _update_config(source, target, exclude_fields): @@ -113,10 +124,13 @@ def _update_zaaktype_config(source_config: ZaakTypeConfig, jsonl: str): catalogus_domein=source_config.catalogus.domein, catalogus_rsin=source_config.catalogus.rsin, ) - except ZaakTypeConfig.MultipleObjectsReturned as exc: - raise ZGWImportError.from_exception_and_jsonl(exc, jsonl) - except (CatalogusConfig.DoesNotExist, ZaakTypeConfig.DoesNotExist) as exc: - raise ZGWImportError.from_exception_and_jsonl(exc, jsonl) + except ( + CatalogusConfig.DoesNotExist, + CatalogusConfig.MultipleObjectsReturned, + ZaakTypeConfig.DoesNotExist, + ZaakTypeConfig.MultipleObjectsReturned, + ) as exc: + raise ZGWImportError.from_exception_and_jsonl(exception=exc, jsonl=jsonl) else: exclude_fields = [ "id", @@ -146,7 +160,7 @@ def _update_nested_zgw_config( catalogus_rsin=catalogus_rsin, ) except (source_config.DoesNotExist, source_config.MultipleObjectsReturned) as exc: - raise ZGWImportError.from_exception_and_jsonl(exc, jsonl) + raise ZGWImportError.from_exception_and_jsonl(exception=exc, jsonl=jsonl) else: _update_config(source_config, target, exclude_fields) @@ -293,6 +307,7 @@ def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self: rows_successfully_processed = 0 import_errors = [] + natural_keys_seen = set() for line in cls._lines_iter_from_jsonl_stream_or_string(stream_or_string): try: (deserialized_object,) = serializers.deserialize( @@ -302,11 +317,17 @@ def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self: use_natural_foreign_keys=True, ) except DeserializationError as exc: - error = ZGWImportError.from_exception_and_jsonl(exc, line) + error = ZGWImportError.from_exception_and_jsonl( + exception=exc, jsonl=line + ) logger.error(error) import_errors.append(error) else: source_config = deserialized_object.object + if (natural_key := source_config.natural_key()) in natural_keys_seen: + import_errors.append(ZGWImportError.from_jsonl(line)) + continue + natural_keys_seen.add(natural_key) try: match source_config: case CatalogusConfig(): diff --git a/src/open_inwoner/openzaak/tests/test_admin.py b/src/open_inwoner/openzaak/tests/test_admin.py index 90a49158b6..7078c62065 100644 --- a/src/open_inwoner/openzaak/tests/test_admin.py +++ b/src/open_inwoner/openzaak/tests/test_admin.py @@ -293,18 +293,18 @@ def test_import_flow_reports_errors(self) -> None: messages[1], ) self.assertIn( - "ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'status omschrijving', " - "ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'", + "ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'bogus', " + "ZaakTypeConfig identificatie = 'ztc-id-a-0'", messages[1], ) self.assertIn( "ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'bogus', ZaakTypeConfig " - "identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'", + "identificatie = 'ztc-id-a-0'", messages[1], ) self.assertIn( "ZaakTypeInformatieObjectTypeConfig not found in target environment: omschrijving = 'informatieobject', " - "ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'", + "ZaakTypeConfig identificatie = 'ztc-id-a-0'", messages[1], ) self.assertIn( @@ -313,7 +313,7 @@ def test_import_flow_reports_errors(self) -> None: ) self.assertIn( "ZaakTypeResultaatTypeConfig not found in target environment: omschrijving = 'resultaat', " - "ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'", + "ZaakTypeConfig identificatie = 'ztc-id-a-0'", messages[1], ) diff --git a/src/open_inwoner/openzaak/tests/test_import_export.py b/src/open_inwoner/openzaak/tests/test_import_export.py index 50478d3e60..70f3348d45 100644 --- a/src/open_inwoner/openzaak/tests/test_import_export.py +++ b/src/open_inwoner/openzaak/tests/test_import_export.py @@ -274,7 +274,11 @@ def setUp(self): '{"model": "openzaak.zaaktypestatustypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "statustype_url": "https://bar.maykinmedia.nl", "omschrijving": "status omschrijving", "statustekst": "statustekst nieuw", "zaaktype_uuids": "[]", "status_indicator": "", "status_indicator_text": "", "document_upload_description": "", "description": "status", "notify_status_change": true, "action_required": false, "document_upload_enabled": true, "call_to_action_url": "", "call_to_action_text": "", "case_link_text": ""}}', '{"model": "openzaak.zaaktyperesultaattypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "resultaattype_url": "https://bar.maykinmedia.nl", "omschrijving": "resultaat", "zaaktype_uuids": "[]", "description": "description new"}}', ] + self.json_dupes = [ + '{"model": "openzaak.zaaktypestatustypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "statustype_url": "https://bar.maykinmedia.nl", "omschrijving": "status omschrijving", "statustekst": "statustekst nieuw", "zaaktype_uuids": "[]", "status_indicator": "", "status_indicator_text": "", "document_upload_description": "", "description": "status", "notify_status_change": true, "action_required": false, "document_upload_enabled": true, "call_to_action_url": "", "call_to_action_text": "", "case_link_text": ""}}', + ] self.jsonl = "\n".join(self.json_lines) + self.jsonl_with_dupes = "\n".join(self.json_lines + self.json_dupes) def test_import_jsonl_update_success(self): mocks = ZGWExportImportMockData() @@ -373,7 +377,7 @@ def test_import_jsonl_missing_statustype_config(self): ) expected_error = ZGWImportError( "ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'bogus', " - "ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'" + "ZaakTypeConfig identificatie = 'ztc-id-a-0'" ) import_expected = dataclasses.asdict( CatalogusConfigImport( @@ -419,7 +423,7 @@ def test_import_jsonl_update_statustype_config_missing_zt_config(self): ) expected_error = ZGWImportError( "ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'status omschrijving', " - "ZaakTypeConfig identificatie = 'bogus', Catalogus domein = 'DM-1', Catalogus rsin = '666666666'" + "ZaakTypeConfig identificatie = 'bogus'" ) import_expected = dataclasses.asdict( CatalogusConfigImport( @@ -445,7 +449,7 @@ def test_import_jsonl_update_statustype_config_missing_zt_config(self): self.assertEqual(ZaakTypeStatusTypeConfig.objects.count(), 1) self.assertEqual(ZaakTypeResultaatTypeConfig.objects.count(), 1) - def test_import_jsonl_update_reports_duplicates(self): + def test_import_jsonl_update_reports_duplicate_db_records(self): mocks = ZGWExportImportMockData() ZaakTypeResultaatTypeConfigFactory( @@ -465,7 +469,7 @@ def test_import_jsonl_update_reports_duplicates(self): ) expected_error = ZGWImportError( "Got multiple results for ZaakTypeResultaatTypeConfig: omschrijving = 'resultaat', " - "ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'" + "ZaakTypeConfig identificatie = 'ztc-id-a-0'" ) import_expected = dataclasses.asdict( CatalogusConfigImport( @@ -484,6 +488,39 @@ def test_import_jsonl_update_reports_duplicates(self): # check import self.assertEqual(import_result, import_expected) + def test_import_jsonl_update_reports_duplicate_natural_keys_in_upload_file(self): + mocks = ZGWExportImportMockData() + + self.storage.save("import.jsonl", io.StringIO(self.jsonl_with_dupes)) + + # we use `asdict` and replace the Exceptions with string representations + # because for Exceptions raised from within dataclasses, equality ==/is identity + import_result = dataclasses.asdict( + CatalogusConfigImport.import_from_jsonl_file_in_django_storage( + "import.jsonl", self.storage + ) + ) + expected_error = ZGWImportError( + "ZaakTypeStatusTypeConfig was processed multiple times because it contains duplicate " + "natural keys: omschrijving = 'status omschrijving', ZaakTypeConfig identificatie = 'ztc-id-a-0'" + ) + import_expected = dataclasses.asdict( + CatalogusConfigImport( + total_rows_processed=6, + catalogus_configs_imported=1, + zaaktype_configs_imported=1, + zaak_informatie_object_type_configs_imported=1, + zaak_status_type_configs_imported=1, + zaak_resultaat_type_configs_imported=1, + import_errors=[expected_error], + ), + ) + import_result["import_errors"][0] = str(import_result["import_errors"][0]) + import_expected["import_errors"][0] = str(import_expected["import_errors"][0]) + + # check import + self.assertEqual(import_result, import_expected) + def test_import_jsonl_fails_with_catalogus_domein_rsin_mismatch(self): service = ServiceFactory(slug="service-0") CatalogusConfigFactory(