Skip to content

Commit

Permalink
[#2913] Check for duplicate keys in ZGW imports
Browse files Browse the repository at this point in the history
  • Loading branch information
pi-sigma committed Nov 28, 2024
1 parent aad5fce commit b91bc32
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 10 deletions.
31 changes: 22 additions & 9 deletions src/open_inwoner/openzaak/import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,17 @@

class ZGWImportError(Exception):
@classmethod
def extract_error_data(cls, exc: Exception, jsonl: str):
exc_source = type(exc.__context__)
def extract_error_data(cls, jsonl: str, exc: Exception | None = None):
data = json.loads(jsonl) if jsonl else {}
source_config = apps.get_model(data["model"])

# error type
if exc_source is CatalogusConfig.DoesNotExist or source_config.DoesNotExist:
error_type = ObjectDoesNotExist
if exc_source is source_config.MultipleObjectsReturned:
error_type = MultipleObjectsReturned
error_type = None
if exc:
exc_source = type(exc.__context__)
if exc_source is CatalogusConfig.DoesNotExist or source_config.DoesNotExist:
error_type = ObjectDoesNotExist
if exc_source is source_config.MultipleObjectsReturned:
error_type = MultipleObjectsReturned

# metadata about source_config
items = []
Expand All @@ -60,8 +61,6 @@ def extract_error_data(cls, exc: Exception, jsonl: str):
items = [
f"omschrijving = {fields['omschrijving']!r}",
f"ZaakTypeConfig identificatie = {fields['zaaktype_config'][0]!r}",
f"Catalogus domein = {fields['zaaktype_config'][1]!r}",
f"Catalogus rsin = {fields['zaaktype_config'][2]!r}",
]

return {
Expand All @@ -82,6 +81,15 @@ def from_exception_and_jsonl(cls, exception: Exception, jsonl: str) -> Self:

return cls(error_template % error_data)

@classmethod
def from_jsonl(cls, jsonl: str) -> Self:
error_data = cls.extract_error_data(jsonl)
error_template = (
"%(source_config_name)s was processed multiple times because it contains "
"duplicate natural keys: %(info)s"
)
return cls(error_template % error_data)


def check_catalogus_config_exists(source_config: CatalogusConfig, jsonl: str):
try:
Expand Down Expand Up @@ -293,6 +301,7 @@ def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self:

rows_successfully_processed = 0
import_errors = []
natural_keys_seen = set()
for line in cls._lines_iter_from_jsonl_stream_or_string(stream_or_string):
try:
(deserialized_object,) = serializers.deserialize(
Expand All @@ -307,6 +316,10 @@ def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self:
import_errors.append(error)
else:
source_config = deserialized_object.object
if (natural_key := source_config.natural_key()) in natural_keys_seen:
import_errors.append(ZGWImportError.from_jsonl(line))
continue
natural_keys_seen.add(natural_key)
try:
match source_config:
case CatalogusConfig():
Expand Down
39 changes: 38 additions & 1 deletion src/open_inwoner/openzaak/tests/test_import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,11 @@ def setUp(self):
'{"model": "openzaak.zaaktypestatustypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "statustype_url": "https://bar.maykinmedia.nl", "omschrijving": "status omschrijving", "statustekst": "statustekst nieuw", "zaaktype_uuids": "[]", "status_indicator": "", "status_indicator_text": "", "document_upload_description": "", "description": "status", "notify_status_change": true, "action_required": false, "document_upload_enabled": true, "call_to_action_url": "", "call_to_action_text": "", "case_link_text": ""}}',
'{"model": "openzaak.zaaktyperesultaattypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "resultaattype_url": "https://bar.maykinmedia.nl", "omschrijving": "resultaat", "zaaktype_uuids": "[]", "description": "description new"}}',
]
self.json_dupes = [
'{"model": "openzaak.zaaktypestatustypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "statustype_url": "https://bar.maykinmedia.nl", "omschrijving": "status omschrijving", "statustekst": "statustekst nieuw", "zaaktype_uuids": "[]", "status_indicator": "", "status_indicator_text": "", "document_upload_description": "", "description": "status", "notify_status_change": true, "action_required": false, "document_upload_enabled": true, "call_to_action_url": "", "call_to_action_text": "", "case_link_text": ""}}',
]
self.jsonl = "\n".join(self.json_lines)
self.jsonl_with_dupes = "\n".join(self.json_lines + self.json_dupes)

def test_import_jsonl_update_success(self):
mocks = ZGWExportImportMockData()
Expand Down Expand Up @@ -445,7 +449,7 @@ def test_import_jsonl_update_statustype_config_missing_zt_config(self):
self.assertEqual(ZaakTypeStatusTypeConfig.objects.count(), 1)
self.assertEqual(ZaakTypeResultaatTypeConfig.objects.count(), 1)

def test_import_jsonl_update_reports_duplicates(self):
def test_import_jsonl_update_reports_duplicate_db_records(self):
mocks = ZGWExportImportMockData()

ZaakTypeResultaatTypeConfigFactory(
Expand Down Expand Up @@ -484,6 +488,39 @@ def test_import_jsonl_update_reports_duplicates(self):
# check import
self.assertEqual(import_result, import_expected)

def test_import_jsonl_update_reports_duplicate_natural_keys_in_upload_file(self):
mocks = ZGWExportImportMockData()

self.storage.save("import.jsonl", io.StringIO(self.jsonl_with_dupes))

# we use `asdict` and replace the Exceptions with string representations
# because for Exceptions raised from within dataclasses, equality ==/is identity
import_result = dataclasses.asdict(
CatalogusConfigImport.import_from_jsonl_file_in_django_storage(
"import.jsonl", self.storage
)
)
expected_error = ZGWImportError(
"ZaakTypeStatusTypeConfig was processed multiple times because it contains duplicate "
"natural keys: omschrijving = 'status omschrijving', ZaakTypeConfig identificatie = 'ztc-id-a-0'"
)
import_expected = dataclasses.asdict(
CatalogusConfigImport(
total_rows_processed=6,
catalogus_configs_imported=1,
zaaktype_configs_imported=1,
zaak_informatie_object_type_configs_imported=1,
zaak_status_type_configs_imported=1,
zaak_resultaat_type_configs_imported=1,
import_errors=[expected_error],
),
)
import_result["import_errors"][0] = str(import_result["import_errors"][0])
import_expected["import_errors"][0] = str(import_expected["import_errors"][0])

# check import
self.assertEqual(import_result, import_expected)

def test_import_jsonl_fails_with_catalogus_domein_rsin_mismatch(self):
service = ServiceFactory(slug="service-0")
CatalogusConfigFactory(
Expand Down

0 comments on commit b91bc32

Please sign in to comment.