diff --git a/common/constants.py b/common/constants.py index 3c213a4f..8dbdb7c3 100644 --- a/common/constants.py +++ b/common/constants.py @@ -4,4 +4,5 @@ class Constants: DATABASE_URL = config.get_db_url() ACCEPTED_DATE_FORMAT = "%Y-%m-%d %H:%M:%S" + PYTHON_ICAT_DISTNCT_CONDITION = "!= null" ICAT_PROPERTIES = config.get_icat_properties() diff --git a/common/filters.py b/common/filters.py index 8ab93a0d..346f76f1 100644 --- a/common/filters.py +++ b/common/filters.py @@ -1,6 +1,8 @@ from abc import ABC, abstractmethod import logging +from common.exceptions import BadRequestError + log = logging.getLogger() @@ -27,6 +29,13 @@ def __init__(self, field, value, operation): self.value = value self.operation = operation + if self.operation == "in": + if not isinstance(self.value, list): + raise BadRequestError( + "When using the 'in' operation for a WHERE filter, the values must" + " be in a list format e.g. [1, 2, 3]" + ) + def _extract_filter_fields(self, field): fields = field.split(".") include_depth = len(fields) diff --git a/common/icat/filters.py b/common/icat/filters.py index d990a797..8420ef3a 100644 --- a/common/icat/filters.py +++ b/common/icat/filters.py @@ -23,8 +23,10 @@ def apply_filter(self, query): log.info("Creating condition for ICAT where filter") if self.operation == "eq": where_filter = self.create_condition(self.field, "=", self.value) + elif self.operation == "ne": + where_filter = self.create_condition(self.field, "!=", self.value) elif self.operation == "like": - where_filter = self.create_condition(self.field, "like", self.value) + where_filter = self.create_condition(self.field, "like", f"%{self.value}%") elif self.operation == "lt": where_filter = self.create_condition(self.field, "<", self.value) elif self.operation == "lte": @@ -34,7 +36,11 @@ def apply_filter(self, query): elif self.operation == "gte": where_filter = self.create_condition(self.field, ">=", self.value) elif self.operation == "in": - where_filter = self.create_condition(self.field, "in", tuple(self.value)) + # Convert self.value into a string with brackets equivalent to tuple format. + # Cannot convert straight to tuple as single element tuples contain a + # trailing comma which Python ICAT/JPQL doesn't accept + self.value = str(self.value).replace("[", "(").replace("]", ")") + where_filter = self.create_condition(self.field, "in", self.value) else: raise FilterError(f"Bad operation given to where filter: {self.operation}") @@ -64,10 +70,13 @@ def create_condition(attribute_name, operator, value): """ conditions = {} - # Removing quote marks when doing conditions with IN expressions - jpql_value = f"{value}" if isinstance(value, tuple) else f"'{value}'" + # Removing quote marks when doing conditions with IN expressions or when a + # distinct filter is used in a request + jpql_value = ( + f"{value}" if operator == "in" or operator == "!=" else f"'{value}'" + ) conditions[attribute_name] = f"{operator} {jpql_value}" - + log.debug("Conditions in ICAT where filter, %s", conditions) return conditions @@ -76,7 +85,18 @@ def __init__(self, fields): super().__init__(fields) def apply_filter(self, query): - pass + try: + log.info("Adding ICAT distinct filter to ICAT query") + query.setAggregate("DISTINCT") + + # Using where filters to identify which fields to apply distinct too + for field in self.fields: + where_filter = PythonICATWhereFilter(field, "null", "ne") + where_filter.apply_filter(query) + + log.debug("Fields for distinct filter: %s", self.fields) + except ValueError as e: + raise FilterError(e) class PythonICATOrderFilter(OrderFilter): diff --git a/common/icat/helpers.py b/common/icat/helpers.py index 97e90138..210d30ce 100644 --- a/common/icat/helpers.py +++ b/common/icat/helpers.py @@ -2,7 +2,6 @@ import logging from datetime import datetime, timedelta -from icat.query import Query from icat.exception import ICATSessionError, ICATValidationError from common.exceptions import ( AuthenticationError, @@ -18,6 +17,7 @@ PythonICATSkipFilter, PythonICATOrderFilter, ) +from common.icat.query import ICATQuery log = logging.getLogger() @@ -94,87 +94,6 @@ def refresh_client_session(client): client.refresh() -def construct_icat_query( - client, entity_name, conditions=None, aggregate=None, includes=None -): - """ - Create a Query object within Python ICAT - - :param client: ICAT client containing an authenticated user - :type client: :class:`icat.client.Client` - :param entity_name: Name of the entity to get data from - :type entity_name: :class:`suds.sax.text.Text` - :param conditions: Constraints used when an entity is queried - :type conditions: :class:`dict` - :param aggregate: Name of the aggregate function to apply. Operations such as - counting the number of records. See `icat.query.setAggregate` for valid values. - :type aggregate: :class:`str` - :param includes: List of related entity names to add to the query so related - entities (and their data) can be returned with the query result - :type includes: :class:`str` or iterable of :class:`str` - :return: Query object from Python ICAT - :raises PythonICATError: If a ValueError is raised when creating a Query(), 500 will - be returned as a response - """ - - try: - query = Query( - client, - entity_name, - conditions=conditions, - aggregate=aggregate, - includes=includes, - ) - except ValueError: - raise PythonICATError( - "An issue has occurred while creating a Python ICAT Query object," - " suggesting an invalid argument" - ) - - return query - - -def execute_icat_query(client, query, return_json_formattable=False): - """ - Execute a previously created ICAT Query object and return in the format specified - by the return_json_formattable flag - - :param client: ICAT client containing an authenticated user - :type client: :class:`icat.client.Client` - :param query: ICAT Query object to execute within Python ICAT - :type query: :class:`icat.query.Query` - :param return_json_formattable: Flag to determine whether the data from the query - should be returned as a list of data ready to be converted straight to JSON - (i.e. if the data will be used as a response for an API call) or whether to - leave the data in a Python ICAT format (i.e. if it's going to be manipulated at - some point) - :type return_json_formattable_data: :class:`bool` - :return: Data (of type list) from the executed query - """ - - try: - query_result = client.search(query) - except ICATValidationError as e: - raise PythonICATError(e) - - if return_json_formattable: - data = [] - for result in query_result: - dict_result = result.as_dict() - for key, value in dict_result.items(): - # Convert datetime objects to strings so they can be JSON serialisable - if isinstance(value, datetime): - # Remove timezone data which isn't utilised in ICAT - dict_result[key] = value.replace(tzinfo=None).strftime( - Constants.ACCEPTED_DATE_FORMAT - ) - - data.append(dict_result) - return data - else: - return query_result - - def get_python_icat_entity_name(client, database_table_name): """ From the database table name, this function returns the correctly cased entity name @@ -303,17 +222,14 @@ def get_entity_by_id(client, table_name, id_, return_json_formattable_data): :raises: MissingRecordError: If Python ICAT cannot find a record of the specified ID """ + selected_entity_name = get_python_icat_entity_name(client, table_name) # Set query condition for the selected ID id_condition = PythonICATWhereFilter.create_condition("id", "=", id_) - selected_entity_name = get_python_icat_entity_name(client, table_name) - - id_query = construct_icat_query( + id_query = ICATQuery( client, selected_entity_name, conditions=id_condition, includes="1" ) - entity_by_id_data = execute_icat_query( - client, id_query, return_json_formattable_data - ) + entity_by_id_data = id_query.execute_query(client, return_json_formattable_data) if not entity_by_id_data: # Cannot find any data matching the given ID @@ -379,15 +295,15 @@ def get_entity_with_filters(client, table_name, filters): """ selected_entity_name = get_python_icat_entity_name(client, table_name) - query = construct_icat_query(client, selected_entity_name) + query = ICATQuery(client, selected_entity_name) filter_handler = FilterOrderHandler() filter_handler.add_filters(filters) merge_limit_skip_filters(filter_handler) clear_order_filters(filter_handler.filters) - filter_handler.apply_filters(query) + filter_handler.apply_filters(query.query) - data = execute_icat_query(client, query, True) + data = query.execute_query(client, True) if not data: raise MissingRecordError("No results found") diff --git a/common/icat/query.py b/common/icat/query.py new file mode 100644 index 00000000..e5411588 --- /dev/null +++ b/common/icat/query.py @@ -0,0 +1,141 @@ +import logging +from datetime import datetime + +from icat.query import Query +from icat.exception import ICATValidationError + +from common.exceptions import PythonICATError +from common.constants import Constants + +log = logging.getLogger() + + +class ICATQuery: + def __init__( + self, client, entity_name, conditions=None, aggregate=None, includes=None + ): + """ + Create a Query object within Python ICAT + + :param client: ICAT client containing an authenticated user + :type client: :class:`icat.client.Client` + :param entity_name: Name of the entity to get data from + :type entity_name: :class:`suds.sax.text.Text` + :param conditions: Constraints used when an entity is queried + :type conditions: :class:`dict` + :param aggregate: Name of the aggregate function to apply. Operations such as + counting the number of records. See `icat.query.setAggregate` for valid + values. + :type aggregate: :class:`str` + :param includes: List of related entity names to add to the query so related + entities (and their data) can be returned with the query result + :type includes: :class:`str` or iterable of :class:`str` + :return: Query object from Python ICAT + :raises PythonICATError: If a ValueError is raised when creating a Query(), 500 + will be returned as a response + """ + + try: + self.query = Query( + client, + entity_name, + conditions=conditions, + aggregate=aggregate, + includes=includes, + ) + except ValueError: + raise PythonICATError( + "An issue has occurred while creating a Python ICAT Query object," + " suggesting an invalid argument" + ) + + def execute_query(self, client, return_json_formattable=False): + """ + Execute a previously created ICAT Query object and return in the format + specified by the return_json_formattable flag + + :param client: ICAT client containing an authenticated user + :type client: :class:`icat.client.Client` + :param return_json_formattable: Flag to determine whether the data from the + query should be returned as a list of data ready to be converted straight to + JSON (i.e. if the data will be used as a response for an API call) or + whether to leave the data in a Python ICAT format (i.e. if it's going to be + manipulated at some point) + :type return_json_formattable_data: :class:`bool` + :return: Data (of type list) from the executed query + """ + + try: + query_result = client.search(self.query) + except ICATValidationError as e: + raise PythonICATError(e) + + if self.query.aggregate == "DISTINCT": + distinct_filter_flag = True + # Check query's conditions for the ones created by the distinct filter + self.attribute_names = [] + log.debug("Query conditions: %s", self.query.conditions) + + for key, value in self.query.conditions.items(): + # Value can be a list if there's multiple WHERE filters for the same + # attribute name within an ICAT query + if isinstance(value, list): + for sub_value in value: + self.check_attribute_name_for_distinct(key, sub_value) + elif isinstance(value, str): + self.check_attribute_name_for_distinct(key, value) + log.debug( + "Attribute names used in the distinct filter, as captured by the" + " query's conditions %s", + self.attribute_names, + ) + else: + distinct_filter_flag = False + + if return_json_formattable: + data = [] + for result in query_result: + dict_result = result.as_dict() + distinct_result = {} + + for key in dict_result: + # Convert datetime objects to strings so they can be JSON + # serialisable + if isinstance(dict_result[key], datetime): + # Remove timezone data which isn't utilised in ICAT + dict_result[key] = ( + dict_result[key] + .replace(tzinfo=None) + .strftime(Constants.ACCEPTED_DATE_FORMAT) + ) + + if distinct_filter_flag: + # Add only the required data as per request's distinct filter + # fields + if key in self.attribute_names: + distinct_result[key] = dict_result[key] + + # Add to the response's data depending on whether request has a distinct + # filter + if distinct_filter_flag: + data.append(distinct_result) + else: + data.append(dict_result) + return data + else: + return query_result + + def check_attribute_name_for_distinct(self, key, value): + """ + Check the attribute name to see if its associated value is used to signify the + attribute is requested in a distinct filter and if so, append it to the list of + attribute names + + :param key: Name of an attribute + :type key: :class:`str` + :param value: Expression that should be applied to the associated attribute + e.g. "= 'Metadata'" + :type value: :class:`str` + """ + if value == Constants.PYTHON_ICAT_DISTNCT_CONDITION: + self.attribute_names.append(key)