Skip to content

Commit

Permalink
Merge pull request #208 from MITLibraries/TIMX-355-control-field-index
Browse files Browse the repository at this point in the history
Address liteary form control field bug
  • Loading branch information
ghukill authored Nov 1, 2024
2 parents e00c46e + 775458b commit 3f77c7c
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ line-length = 90
[tool.mypy]
disallow_untyped_calls = true
disallow_untyped_defs = true
exclude = ["tests/"]
exclude = ["tests/", "output/"]

[tool.pytest.ini_options]
log_level = "INFO"
Expand Down
10 changes: 10 additions & 0 deletions tests/sources/xml/test_marc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1349,6 +1349,16 @@ def test_get_literary_form_transforms_correctly_if_char_positions_blank():
assert Marc.get_literary_form(source_record) is None


def test_get_literary_form_returns_none_if_control_field_too_short(caplog):
caplog.set_level("DEBUG")
source_record = create_marc_source_record_stub(
control_field_insert='<controlfield tag="008">220613s '
"|||||o||||||||||||d</controlfield>",
)
assert Marc.get_literary_form(source_record) is None
assert "could not parse literary form" in caplog.text


def test_get_links_success():
source_record = create_marc_source_record_stub(
datafield_insert=(
Expand Down
10 changes: 10 additions & 0 deletions transmogrifier/sources/xml/marc.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,10 +586,20 @@ def get_literary_form(cls, source_record: Tag) -> str | None:
and Leader/07 (Bibliographic level) contains code
a (Monographic component part), c (Collection), d (Subunit),
or m (Monograph).
If control field 008 is shorter than 34 characters, return None as we cannot
accurately determine.
"""
leader_field = cls._get_leader_field(source_record)
control_field = cls._get_control_field(source_record)
if leader_field[6] in "at" and leader_field[7] in "acdm":
if len(control_field) <= 33: # noqa: PLR2004
message = (
f"Record ID '{cls.get_source_record_id(source_record)}' has less than"
"34 characters for control field 008, could not parse literary form."
)
logger.debug(message)
return None
if control_field[33] in "0se":
return "Nonfiction"
return "Fiction"
Expand Down

0 comments on commit 3f77c7c

Please sign in to comment.