diff --git a/transmogrifier/exceptions.py b/transmogrifier/exceptions.py new file mode 100644 index 0000000..8022d9c --- /dev/null +++ b/transmogrifier/exceptions.py @@ -0,0 +1,21 @@ +class DeletedRecordEvent(Exception): # noqa: N818 + """Exception raised for records with a deleted status. + + Attributes: + timdex_record_id: The TIMDEX record ID (not the source record ID) for the record. + """ + + def __init__(self, timdex_record_id: str) -> None: + self.timdex_record_id = timdex_record_id + + +class SkippedRecordEvent(Exception): # noqa: N818 + """Exception raised for records that should be skipped. + + Attributes: + source_record_id: The ID for the source record. + """ + + def __init__(self, message: str | None = None, source_record_id: str | None = None): + super().__init__(message) + self.source_record_id = source_record_id diff --git a/transmogrifier/helpers.py b/transmogrifier/helpers.py index 70f748f..68529ca 100644 --- a/transmogrifier/helpers.py +++ b/transmogrifier/helpers.py @@ -132,15 +132,3 @@ def validate_date_range( end_date, ) return False - - -class DeletedRecordEvent(Exception): # noqa: N818 - """Exception raised for records with a deleted status. - - Attributes: - timdex_record_id: The TIMDEX record ID (not the source record ID) for the record - - """ - - def __init__(self, timdex_record_id: str) -> None: - self.timdex_record_id = timdex_record_id diff --git a/transmogrifier/sources/transformer.py b/transmogrifier/sources/transformer.py index 00e6834..574d466 100644 --- a/transmogrifier/sources/transformer.py +++ b/transmogrifier/sources/transformer.py @@ -17,7 +17,8 @@ # should not be a security issue. import transmogrifier.models as timdex from transmogrifier.config import SOURCES -from transmogrifier.helpers import DeletedRecordEvent, generate_citation, validate_date +from transmogrifier.exceptions import DeletedRecordEvent, SkippedRecordEvent +from transmogrifier.helpers import generate_citation, validate_date if TYPE_CHECKING: from collections.abc import Iterator @@ -68,11 +69,14 @@ def __next__(self) -> timdex.TimdexRecord: except DeletedRecordEvent as error: self.deleted_records.append(error.timdex_record_id) continue - if record: - self.transformed_record_count += 1 - return record - self.skipped_record_count += 1 - continue + except SkippedRecordEvent: + self.skipped_record_count += 1 + continue + if not record: + self.skipped_record_count += 1 + continue + self.transformed_record_count += 1 + return record @final def transform_and_write_output_files(self, output_file: str) -> None: diff --git a/transmogrifier/sources/xml/datacite.py b/transmogrifier/sources/xml/datacite.py index 67700b3..dd82147 100644 --- a/transmogrifier/sources/xml/datacite.py +++ b/transmogrifier/sources/xml/datacite.py @@ -3,6 +3,7 @@ from bs4 import Tag # type: ignore[import-untyped] import transmogrifier.models as timdex +from transmogrifier.exceptions import SkippedRecordEvent from transmogrifier.helpers import validate_date, validate_date_range from transmogrifier.sources.xmltransformer import XMLTransformer @@ -54,7 +55,8 @@ def get_optional_fields(self, xml: Tag) -> dict | None: if self.valid_content_types([content_type]): fields["content_type"] = [content_type] else: - return None + message = f'Record skipped based on content type: "{content_type}"' + raise SkippedRecordEvent(message, source_record_id) else: logger.warning( "Datacite record %s missing required Datacite field resourceType",