Skip to content

Commit

Permalink
Added scoring to the query filter
Browse files Browse the repository at this point in the history
  • Loading branch information
antolinos committed Jan 26, 2023
1 parent d5d5dba commit 7cb0747
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 1 deletion.
4 changes: 4 additions & 0 deletions datagateway_api/src/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ class SearchAPI(BaseModel):
mechanism: StrictStr
username: StrictStr
password: StrictStr
scoring_enabled: StrictBool
scoring_server: StrictStr
scoring_group: StrictStr
scoring_limit: StrictInt

_validate_extension = validator("extension", allow_reuse=True)(validate_extension)

Expand Down
6 changes: 6 additions & 0 deletions datagateway_api/src/common/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,9 @@ class SearchAPIError(ApiError):
def __init__(self, msg="Search API error", *args, **kwargs):
super().__init__(msg, *args, **kwargs)
self.status_code = 500


class ScoringAPIError(ApiError):
def __init__(self, msg="Scoring API error", *args, **kwargs):
super().__init__(msg, *args, **kwargs)
self.status_code = 500
6 changes: 6 additions & 0 deletions datagateway_api/src/resources/search_api_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@

from flask_restful import Resource

from datagateway_api.src.common.config import Config
from datagateway_api.src.common.helpers import get_filters_from_query_string
from datagateway_api.src.search_api.helpers import (
add_scores_to_entities,
get_count,
get_files,
get_files_count,
get_score,
get_search,
get_search_api_query_filter_list,
get_with_pid,
Expand Down Expand Up @@ -43,6 +46,9 @@ def get(self):
"LOWER(o.summary) like '%" + query.lower() + "%'",
)

if Config.config.search_api.scoring_enabled:
scores = get_score(entities, query)
entities = add_scores_to_entities(entities, scores)
return entities, 200

get.__doc__ = f"""
Expand Down
64 changes: 64 additions & 0 deletions datagateway_api/src/search_api/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
import logging

from pydantic import ValidationError
import requests

from datagateway_api.src.common.config import Config
from datagateway_api.src.common.exceptions import (
BadRequestError,
MissingRecordError,
ScoringAPIError,
SearchAPIError,
)
from datagateway_api.src.common.filter_order_handler import FilterOrderHandler
Expand Down Expand Up @@ -75,6 +78,67 @@ def create_error_message(e):
return wrapper_error_handling


def get_score(entities, query):
"""
Gets the score on the given entities based in the query parameter
that is the term to be found
:param entities: List of entities that have been retrieved from one ICAT query.
:type entities: :class:`list`
:param query: String with the term to be searched by
:type query: :class:`str`
"""
try:
data = {
"query": query,
"group": Config.config.search_api.scoring_group,
"limit": Config.config.search_api.scoring_limit,
# With itemIds, scoring server returns a 400 error. No idea why.
# "itemIds": list(map(lambda entity: (entity["pid"]), entities)), #
}
response = requests.post(
Config.config.search_api.scoring_server,
json=data,
timeout=5, # Could this be a configuration parameter?
)
if response.status_code < 400:
return response.json()["scores"]
else:
raise ScoringAPIError(
Exception(f"Score API returned {response.status_code}"),
)
except ValueError as e:
log.error("Response is not a valid json")
raise e
except ConnectionError as e:
log.error("ConnectionError to %s ", Config.config.search_api.scoring_server)
raise e
except Exception as e:
log.error("Error on scoring")
raise e


def add_scores_to_entities(entities, scores):
"""
For each entity this function adds the score if it is found by matching
the score.item.itemsId with the pid of the entity
Otherwise the score is filled with -1 (arbitrarily chosen)
:param entities: List of entities that have been retrieved from one ICAT query.
:type entities: :class:`list`
:param scores: List of items retrieved from the scoring application
:type scores: :class:`list`
"""
for entity in entities:
entity["score"] = -1
items = list(
filter(
lambda score: f"pid:{score['itemId']}" == str(entity["pid"]), scores,
),
)
if len(items) == 1:
entity["score"] = items[0]["score"]
return entities


@client_manager
def get_search(entity_name, filters, str_conditions=None):
"""
Expand Down
2 changes: 1 addition & 1 deletion datagateway_api/src/search_api/panosc_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def get_icat_mapping(self, panosc_entity_name, field_name):

try:
icat_mapping = self.mappings[panosc_entity_name][field_name]
log.debug("ICAT mapping/translation found: %s", icat_mapping)
# Too verbose log.debug("ICAT mapping/translation found: %s", icat_mapping)
except KeyError as e:
raise FilterError(f"Bad PaNOSC to ICAT mapping: {e.args}")

Expand Down

0 comments on commit 7cb0747

Please sign in to comment.