Skip to content

Commit

Permalink
feature/mx-1664-improve-wikidata-convenience-function-for-use-in-extr…
Browse files Browse the repository at this point in the history
…actors (#339)

# Changes
wikidata helper optionally accepts wikidata primary source
  • Loading branch information
esinsj authored Nov 22, 2024
1 parent 280aecb commit bb647e8
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

### Changes
- wikidata helper now optionally accepts wikidata primary source

### Deprecated

Expand Down
7 changes: 5 additions & 2 deletions mex/common/wikidata/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from mex.common.exceptions import MExError
from mex.common.models.organization import ExtractedOrganization
from mex.common.models.primary_source import ExtractedPrimarySource
from mex.common.primary_source.helpers import get_extracted_primary_source_by_name
from mex.common.wikidata.extract import search_organization_by_label
from mex.common.wikidata.transform import (
Expand All @@ -12,14 +13,15 @@
@cache
def get_extracted_organization_from_wikidata(
query_string: str,
wikidata_primary_source: ExtractedPrimarySource | None = None,
) -> ExtractedOrganization | None:
"""Get extracted organization matching the query string.
Search wikidata for organization and transform it into an ExtractedOrganization.
Args:
query_string: query string to search in wikidata
wikidata_primary_source: wikidata primary source
wikidata_primary_source: optional wikidata primary source
Returns:
ExtractedOrganization if one matching organization is found in
Expand All @@ -31,7 +33,8 @@ def get_extracted_organization_from_wikidata(
if found_organization is None:
return None

wikidata_primary_source = get_extracted_primary_source_by_name("wikidata")
if not wikidata_primary_source:
wikidata_primary_source = get_extracted_primary_source_by_name("wikidata")
if not wikidata_primary_source:
msg = "Primary source for wikidata not found"
raise MExError(msg)
Expand Down
15 changes: 11 additions & 4 deletions tests/wikidata/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,16 @@ def test_get_extracted_organization_from_wikidata(
)
)

# organization found and transformed
returned = get_extracted_organization_from_wikidata("Robert Koch-Institut")
assert returned == extracted_wikidata_organization
# test with passing the wikidata primary source: organization found and transformed
assert extracted_wikidata_organization == get_extracted_organization_from_wikidata(
"Robert Koch-Institut",
wikidata_primary_source,
)

# test w/o passing the wikidata primary source: organization found and transformed
assert extracted_wikidata_organization == get_extracted_organization_from_wikidata(
"Robert Koch-Institut",
)


@pytest.mark.integration
Expand All @@ -37,4 +44,4 @@ def test_get_extracted_organization_from_wikidata_for_nonsensequery_and_exceptio
assert returned is None

except MExError:
pytest.fail("Primary Source Wikidata not found")
pytest.fail("Primary source for wikidata not found")

0 comments on commit bb647e8

Please sign in to comment.