From bb647e88a0c2f6508b0485fc06d05e94c1a41e76 Mon Sep 17 00:00:00 2001 From: esinsj <72222648+esinsj@users.noreply.github.com> Date: Fri, 22 Nov 2024 12:07:51 +0100 Subject: [PATCH] feature/mx-1664-improve-wikidata-convenience-function-for-use-in-extractors (#339) # Changes wikidata helper optionally accepts wikidata primary source --- CHANGELOG.md | 1 + mex/common/wikidata/helpers.py | 7 +++++-- tests/wikidata/test_helpers.py | 15 +++++++++++---- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 000c0e02..9d7a1f1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Changes +- wikidata helper now optionally accepts wikidata primary source ### Deprecated diff --git a/mex/common/wikidata/helpers.py b/mex/common/wikidata/helpers.py index 2253113a..f634433a 100644 --- a/mex/common/wikidata/helpers.py +++ b/mex/common/wikidata/helpers.py @@ -2,6 +2,7 @@ from mex.common.exceptions import MExError from mex.common.models.organization import ExtractedOrganization +from mex.common.models.primary_source import ExtractedPrimarySource from mex.common.primary_source.helpers import get_extracted_primary_source_by_name from mex.common.wikidata.extract import search_organization_by_label from mex.common.wikidata.transform import ( @@ -12,6 +13,7 @@ @cache def get_extracted_organization_from_wikidata( query_string: str, + wikidata_primary_source: ExtractedPrimarySource | None = None, ) -> ExtractedOrganization | None: """Get extracted organization matching the query string. @@ -19,7 +21,7 @@ def get_extracted_organization_from_wikidata( Args: query_string: query string to search in wikidata - wikidata_primary_source: wikidata primary source + wikidata_primary_source: optional wikidata primary source Returns: ExtractedOrganization if one matching organization is found in @@ -31,7 +33,8 @@ def get_extracted_organization_from_wikidata( if found_organization is None: return None - wikidata_primary_source = get_extracted_primary_source_by_name("wikidata") + if not wikidata_primary_source: + wikidata_primary_source = get_extracted_primary_source_by_name("wikidata") if not wikidata_primary_source: msg = "Primary source for wikidata not found" raise MExError(msg) diff --git a/tests/wikidata/test_helpers.py b/tests/wikidata/test_helpers.py index a41860a0..7a1f20af 100644 --- a/tests/wikidata/test_helpers.py +++ b/tests/wikidata/test_helpers.py @@ -21,9 +21,16 @@ def test_get_extracted_organization_from_wikidata( ) ) - # organization found and transformed - returned = get_extracted_organization_from_wikidata("Robert Koch-Institut") - assert returned == extracted_wikidata_organization + # test with passing the wikidata primary source: organization found and transformed + assert extracted_wikidata_organization == get_extracted_organization_from_wikidata( + "Robert Koch-Institut", + wikidata_primary_source, + ) + + # test w/o passing the wikidata primary source: organization found and transformed + assert extracted_wikidata_organization == get_extracted_organization_from_wikidata( + "Robert Koch-Institut", + ) @pytest.mark.integration @@ -37,4 +44,4 @@ def test_get_extracted_organization_from_wikidata_for_nonsensequery_and_exceptio assert returned is None except MExError: - pytest.fail("Primary Source Wikidata not found") + pytest.fail("Primary source for wikidata not found")