diff --git a/datagateway_api/common/icat/filters.py b/datagateway_api/common/icat/filters.py index 14b986be..1b68c09b 100644 --- a/datagateway_api/common/icat/filters.py +++ b/datagateway_api/common/icat/filters.py @@ -117,22 +117,30 @@ def __init__(self, fields): def apply_filter(self, query): try: log.info("Adding ICAT distinct filter to ICAT query") - if ( - query.aggregate == "COUNT" - or query.aggregate == "AVG" - or query.aggregate == "SUM" - ): + log.debug("Fields for distinct filter: %s", self.fields) + + # These aggregate keywords not currently used in the API, but conditional + # present in case they're used in the future + if query.aggregate == "AVG" or query.aggregate == "SUM": # Distinct can be combined with other aggregate functions query.setAggregate(f"{query.aggregate}:DISTINCT") + elif query.aggregate == "COUNT": + # When count and distinct keywords are used together when selecting + # multiple attributes, Python ICAT will always throw an error on query + # execution (more info: + # https://github.com/icatproject/python-icat/issues/76). This appears to + # be a JPQL limitation, something that cannot be fixed in Python ICAT. + # As a result, the API will get the distinct results and manually + # perform `len()` on the list, using `manual_count` as a flag to + # recognise this situation + query.setAggregate("DISTINCT") + log.debug("Manual count flag enabled") + query.manual_count = True else: query.setAggregate("DISTINCT") - # Using where filters to identify which fields to apply distinct too - for field in self.fields: - where_filter = PythonICATWhereFilter(field, "null", "ne") - where_filter.apply_filter(query) + query.setAttributes(self.fields) - log.debug("Fields for distinct filter: %s", self.fields) except ValueError as e: raise FilterError(e) diff --git a/datagateway_api/common/icat/helpers.py b/datagateway_api/common/icat/helpers.py index 73472cc9..e7859406 100644 --- a/datagateway_api/common/icat/helpers.py +++ b/datagateway_api/common/icat/helpers.py @@ -553,6 +553,7 @@ def get_facility_cycles_for_instrument( query_aggregate = "COUNT:DISTINCT" if count_query else "DISTINCT" query = ICATQuery(client, "FacilityCycle", aggregate=query_aggregate) + query.isis_endpoint = True instrument_id_check = PythonICATWhereFilter( "facility.instruments.id", instrument_id, "eq", @@ -633,6 +634,7 @@ def get_investigations_for_instrument_in_facility_cycle( query_aggregate = "COUNT:DISTINCT" if count_query else "DISTINCT" query = ICATQuery(client, "Investigation", aggregate=query_aggregate) + query.isis_endpoint = True instrument_id_check = PythonICATWhereFilter( "facility.instruments.id", instrument_id, "eq", diff --git a/datagateway_api/common/icat/query.py b/datagateway_api/common/icat/query.py index 4721523f..97e1af41 100644 --- a/datagateway_api/common/icat/query.py +++ b/datagateway_api/common/icat/query.py @@ -5,9 +5,8 @@ from icat.exception import ICATInternalError, ICATValidationError from icat.query import Query -from datagateway_api.common.constants import Constants from datagateway_api.common.date_handler import DateHandler -from datagateway_api.common.exceptions import FilterError, PythonICATError +from datagateway_api.common.exceptions import PythonICATError log = logging.getLogger() @@ -37,6 +36,9 @@ def __init__( :raises PythonICATError: If a ValueError is raised when creating a Query(), 500 will be returned as a response """ + # Needed for ISIS endpoints as they use DISTINCT keyword but don't select + # multiple attributes + self.isis_endpoint = False try: log.info("Creating ICATQuery for entity: %s", entity_name) @@ -47,6 +49,8 @@ def __init__( aggregate=aggregate, includes=includes, ) + # Initialising flag for distinct filter on count endpoints + self.query.manual_count = False except ValueError: raise PythonICATError( "An issue has occurred while creating a Python ICAT Query object," @@ -77,7 +81,6 @@ def execute_query(self, client, return_json_formattable=False): raise PythonICATError(e) flat_query_includes = self.flatten_query_included_fields(self.query.includes) - mapped_distinct_fields = None # If the query has a COUNT function applied to it, some of these steps can be # skipped @@ -87,29 +90,45 @@ def execute_query(self, client, return_json_formattable=False): count_query = True log.debug("This ICATQuery is used for COUNT purposes") - if self.query.aggregate == "DISTINCT" and not count_query: + distinct_query = False + if ( + self.query.aggregate == "DISTINCT" + and not count_query + and not self.query.manual_count + and not self.isis_endpoint + ): + distinct_query = True log.info("Extracting the distinct fields from query's conditions") # Check query's conditions for the ones created by the distinct filter - distinct_attributes = self.iterate_query_conditions_for_distinctiveness() - if distinct_attributes != []: - mapped_distinct_fields = self.map_distinct_attributes_to_entity_names( - distinct_attributes, flat_query_includes, - ) - log.debug( - "Attribute names used in the distinct filter, mapped to the entity" - " they are a part of: %s", - mapped_distinct_fields, - ) + distinct_attributes = self.get_distinct_attributes() if return_json_formattable: log.info("Query results will be returned in a JSON format") data = [] + if self.query.manual_count: + # Manually count the number of results + data.append(len(query_result)) + return data + for result in query_result: - if not count_query: - dict_result = self.entity_to_dict( - result, flat_query_includes, mapped_distinct_fields, + if distinct_query: + # When multiple attributes are given in a distinct filter, Python + # ICAT returns the results in a nested list. This doesn't happen + # when a single attribute is given, so the result is encased in a + # list as `map_distinct_attributes_to_results()` assumes a list as + # input + if not isinstance(result, tuple): + result = [result] + + # Map distinct attributes and result + data.append( + self.map_distinct_attributes_to_results( + distinct_attributes, result, + ), ) + elif not count_query: + dict_result = self.entity_to_dict(result, flat_query_includes) data.append(dict_result) else: data.append(result) @@ -119,37 +138,10 @@ def execute_query(self, client, return_json_formattable=False): log.info("Query results will be returned as ICAT entities") return query_result - def iterate_query_conditions_for_distinctiveness(self): - distinct_attributes = [] - for attribute_name, where_statement in self.query.conditions.items(): - if isinstance(where_statement, list): - for sub_value in where_statement: - self.check_attribute_name_for_distinct( - distinct_attributes, attribute_name, sub_value, - ) - elif isinstance(where_statement, str): - self.check_attribute_name_for_distinct( - distinct_attributes, attribute_name, where_statement, - ) - - return distinct_attributes - - def check_attribute_name_for_distinct(self, attribute_list, key, value): - """ - Check the attribute name to see if its associated value is used to signify the - attribute is requested in a distinct filter and if so, append it to the list of - attribute names - - :param key: Name of an attribute - :type key: :class:`str` - :param value: Expression that should be applied to the associated attribute - e.g. "= 'Metadata'" - :type value: :class:`str` - """ - if value == Constants.PYTHON_ICAT_DISTNCT_CONDITION: - attribute_list.append(key) + def get_distinct_attributes(self): + return self.query.attributes - def entity_to_dict(self, entity, includes, distinct_fields=None): + def entity_to_dict(self, entity, includes): """ This expands on Python ICAT's implementation of `icat.entity.Entity.as_dict()` to use set operators to create a version of the entity as a dictionary @@ -176,13 +168,6 @@ def entity_to_dict(self, entity, includes, distinct_fields=None): include_set = (entity.InstRel | entity.InstMRel) & set(includes) for key in entity.InstAttr | entity.MetaAttr | include_set: if key in includes: - # Make a copy of distinct_fields when calling this function again later - if distinct_fields is not None: - distinct_fields_copy = self.prepare_distinct_fields( - key, distinct_fields, - ) - else: - distinct_fields_copy = None target = getattr(entity, key) # Copy and remove don't return values so must be done separately @@ -195,131 +180,92 @@ def entity_to_dict(self, entity, includes, distinct_fields=None): " cause an issue further on in the request", ) if isinstance(target, Entity): - d[key] = self.entity_to_dict( - target, includes_copy, distinct_fields_copy, - ) + d[key] = self.entity_to_dict(target, includes_copy) # Related fields with one-many relationships are stored as EntityLists elif isinstance(target, EntityList): d[key] = [] for e in target: - d[key].append( - self.entity_to_dict(e, includes_copy, distinct_fields_copy), - ) + d[key].append(self.entity_to_dict(e, includes_copy)) # Add actual piece of data to the dictionary else: - entity_data = None - - if distinct_fields is None or key in distinct_fields["base"]: - entity_data = getattr(entity, key) - # Convert datetime objects to strings ready to be outputted as JSON - if isinstance(entity_data, datetime): - # Remove timezone data which isn't utilised in ICAT - entity_data = DateHandler.datetime_object_to_str(entity_data) + entity_data = getattr(entity, key) + # Convert datetime objects to strings ready to be outputted as JSON + if isinstance(entity_data, datetime): + # Remove timezone data which isn't utilised in ICAT + entity_data = DateHandler.datetime_object_to_str(entity_data) - d[key] = entity_data + d[key] = entity_data return d - def map_distinct_attributes_to_entity_names(self, distinct_fields, included_fields): + def map_distinct_attributes_to_results(self, distinct_attributes, query_result): """ - This function looks at a list of dot-separated fields and maps them to which - entity they belong to - - The result of this function will be a dictionary that has a data structure - similar to the example below. The values assigned to the 'base' key are the - fields that belong to the entity the request is being sent to (e.g. the base - values of `/users` would be fields belonging to the User entity). - - Example return value: - `{'base': ['id', 'modTime'], 'userGroups': ['id', 'fullName'], - 'investigationUser': ['id', 'role']}` - - For distinct fields that are part of included entities (e.g. userGroups.id), it - is assumed that the relevant entities have been specified in an include filter. - This is checked, and a suitable exception is thrown. Without this, the query - would execute, and the user would get a 200 response, but they wouldn't receive - the data they're expecting, hence it's more sensible to raise a 400 to alert - them to their probable mistake, rather than to just log a warning. - - :param distinct_fields: List of fields that should be distinctive in the request - response, as per the distinct filters in the request - :type distinct_fields: :class:`list` - :param included_fields: List of fields that have been included in the ICAT - query. It is assumed each element has been checked for multiple fields - separated by dots, split them accordingly and flattened the resulting list. - Note: ICATQuery.flatten_query_included_fields performs this functionality. - :type included_fields: :class:`list` - :return: Dictionary of fields, where the key denotes which entity they belong to + Maps the attribute names from a distinct filter onto the results given by the + query constructed and executed using Python ICAT + + When selecting multiple (but not all) attributes in a JPQL query, the results + are returned in a list and not mapped to an entity object. As a result, + `entity_to_dict()` cannot be used as that function assumes an entity object + input. Within the API, selecting multiple attributes happens when a distinct + filter is applied to a request. This function is the alternative for processing + data ready for output + + :param distinct_attributes: List of distinct attributes from the distinct + filter of the incoming request + :type distinct_attributes: :class:`list` + :param query_result: Results fetched from Python ICAT + :type query_result: :class:`tuple` or :class:`list` when a single attribute is + given + :return: Dictionary of attribute names paired with the results, ready to be + returned to the user """ + result_dict = {} + for attr_name, data in zip(distinct_attributes, query_result): + # Splitting attribute names in case it's from a related entity + split_attr_name = attr_name.split(".") + + if isinstance(data, datetime): + data = DateHandler.datetime_object_to_str(data) + + # Attribute name is from the 'origin' entity (i.e. not a related entity) + if len(split_attr_name) == 1: + result_dict[attr_name] = data + # Attribute name is a related entity, dictionary needs to be nested + else: + result_dict.update(self.map_nested_attrs({}, split_attr_name, data)) + + return result_dict - # Mapping which entities have distinct fields - distinct_field_dict = {"base": []} - - for field in distinct_fields: - split_fields = field.split(".") - # Single element list means the field belongs to the entity which the - # request has been sent to - if len(split_fields) == 1: - # Conventional list assignment causes IndexError because -2 is out of - # range of a list with a single element - split_fields.insert(0, "base") - - # Check that only an entity name, and attribute name exist - # Code within loop is used for when `split_fields` = - # ['dataset', 'investigation', 'name'] for example - while len(split_fields) > 2: - # If a key doesn't exist in the dictionary, create it and assign an - # empty list to it - distinct_field_dict.setdefault(split_fields[0], []) - split_fields.pop(0) - - distinct_field_dict.setdefault(split_fields[0], []) - distinct_field_dict[split_fields[0]].append(split_fields[-1]) - - # Remove "base" key as this isn't a valid entity name in Python ICAT - distinct_entities = list(distinct_field_dict.keys()) - distinct_entities.remove("base") - - # Search through entity names that have distinct fields for the request and - # ensure these same entity names are in the query's includes - for entity in distinct_entities: - if entity not in included_fields: - raise FilterError( - "A distinct field that has a relationship with another entity does" - " not have the included entity within an include filter in this" - " request. Please add all related entities which are required for" - " the fields in the distinct filter distinct to an include filter.", - ) - - return distinct_field_dict - - def prepare_distinct_fields(self, entity_name, distinct_fields): + def map_nested_attrs(self, nested_dict, split_attr_name, query_data): """ - Copy `distinct_fields` and move the data held in `entity_name` portion of the - dictionary to the "base" section of the dictionary. This function is called in - preparation for recursive calls occurring in entity_to_dict() - - See map_distinct_attribute_to_entity_names() for an explanation regarding - `distinct_fields` and its data structure - - :param entity_name: Name of the Python ICAT entity - :type entity_name: :class:`str` - :param distinct_fields: Names of fields in Python ICAT which should be outputted - in the response, separated by which entities they belong to as the keys - :type distinct_fields: :class:`dict` - :return: A copy of `distinct_fields`, with the data from the entity name put - into the base portion of the dictionary + A function that can be called recursively to map attributes from related + entities to the associated data + + :param nested_dict: Dictionary to insert data into + :type nested_dict: :class:`dict` + :param split_attr_name: List of parts to an attribute name, that have been split + by "." + :type split_attr_name: :class:`list` + :param query_data: Data to be added to the dictionary + :type query_data: :class:`str` or :class:`str` + :return: Dictionary to be added to the result dictionary """ - log.debug("Entity Name: %s, Distinct Fields: %s", entity_name, distinct_fields) - - distinct_fields_copy = distinct_fields.copy() - - # Reset base fields - distinct_fields_copy["base"] = [] - if entity_name in distinct_fields_copy.keys(): - distinct_fields_copy["base"] = distinct_fields_copy[entity_name] + # Popping LHS of related attribute name to see if it's an attribute name or part + # of a path to a related entity + attr_name_pop = split_attr_name.pop(0) + + # Related attribute name, ready to insert data into dictionary + if len(split_attr_name) == 0: + # at role, so put data in + nested_dict[attr_name_pop] = query_data + # Part of the path for related entity, need to recurse to get to attribute name + else: + nested_dict[attr_name_pop] = {} + self.map_nested_attrs( + nested_dict[attr_name_pop], split_attr_name, query_data, + ) - return distinct_fields_copy + return nested_dict def flatten_query_included_fields(self, includes): """ diff --git a/poetry.lock b/poetry.lock index 8b1de70c..c052da0e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -663,7 +663,7 @@ six = ">=1.5" [[package]] name = "python-icat" -version = "0.17.0" +version = "0.18.1" description = "Python interface to ICAT and IDS" category = "main" optional = false @@ -863,7 +863,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pyt [metadata] lock-version = "1.1" python-versions = "^3.6" -content-hash = "8544fcdd39d43fefe311167bd7981c5518ae5e0f39a8be5524bf924cb1510278" +content-hash = "8e6c3af6795dbb3c0d0f27e870b4d895cf399ca51418ebbb2636ccc561caf293" [metadata.files] aniso8601 = [ @@ -1230,7 +1230,7 @@ python-dateutil = [ {file = "python_dateutil-2.8.1-py2.py3-none-any.whl", hash = "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"}, ] python-icat = [ - {file = "python-icat-0.17.0.tar.gz", hash = "sha256:92942ce5e4b4c7b7db8179b78c07c58b56091a1d275385f69dd99d19a58a9396"}, + {file = "python-icat-0.18.1.tar.gz", hash = "sha256:d8b8fc1a535a78e1aac263594851c2dada798b027c60cc9bb20411fb6835650c"}, ] pytz = [ {file = "pytz-2021.1-py2.py3-none-any.whl", hash = "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"}, diff --git a/pyproject.toml b/pyproject.toml index 663ef326..01b9d60d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ Flask-Cors = "3.0.9" apispec = "3.3.0" flask-swagger-ui = "3.25.0" PyYAML = "5.4" -python-icat = "0.17.0" +python-icat = "0.18.1" suds-community = "^0.8.4" Flask-SQLAlchemy = "^2.4.4" requests = "^2.25.1" diff --git a/test/icat/endpoints/test_count_with_filters_icat.py b/test/icat/endpoints/test_count_with_filters_icat.py index 3f4d09c1..8b112c8e 100644 --- a/test/icat/endpoints/test_count_with_filters_icat.py +++ b/test/icat/endpoints/test_count_with_filters_icat.py @@ -2,24 +2,44 @@ class TestICATCountWithFilters: - @pytest.mark.usefixtures("single_investigation_test_data") + @pytest.mark.parametrize( + "query_params, expected_result", + [ + pytest.param( + '?where={"title": {"like": "Test data for the Python ICAT Backend on' + ' DataGateway API"}}', + 5, + id="Filter on test data", + ), + pytest.param( + '?where={"title": {"like": "Test data for the Python ICAT Backend on' + ' DataGateway API"}}&distinct=["startDate"]', + 1, + id="Distinct test data", + ), + ], + ) + @pytest.mark.usefixtures("multiple_investigation_test_data") def test_valid_count_with_filters( - self, flask_test_app_icat, valid_icat_credentials_header, + self, + flask_test_app_icat, + valid_icat_credentials_header, + query_params, + expected_result, ): test_response = flask_test_app_icat.get( - '/investigations/count?where={"title": {"like": "Test data for the Python' - ' ICAT Backend on DataGateway API"}}', + f"/investigations/count{query_params}", headers=valid_icat_credentials_header, ) - assert test_response.json == 1 + assert test_response.json == expected_result def test_valid_no_results_count_with_filters( self, flask_test_app_icat, valid_icat_credentials_header, ): test_response = flask_test_app_icat.get( - '/investigations/count?where={"title": {"like": "This filter should cause a' - '404 for testing purposes..."}}', + '/investigations/count?where={"title": {"like": "This filter should cause 0' + ' results to be found for testing purposes..."}}', headers=valid_icat_credentials_header, ) diff --git a/test/icat/filters/test_distinct_filter.py b/test/icat/filters/test_distinct_filter.py index 66255b3a..4adf3ae8 100644 --- a/test/icat/filters/test_distinct_filter.py +++ b/test/icat/filters/test_distinct_filter.py @@ -5,12 +5,19 @@ class TestICATDistinctFilter: - def test_valid_str_field_input(self, icat_query): - test_filter = PythonICATDistinctFieldFilter("name") + @pytest.mark.parametrize( + "attribute_name", + [ + pytest.param("name", id="Attribute for own entity"), + pytest.param("investigationUsers.role", id="Related attribute name"), + ], + ) + def test_valid_str_field_input(self, icat_query, attribute_name): + test_filter = PythonICATDistinctFieldFilter(attribute_name) test_filter.apply_filter(icat_query) assert ( - icat_query.conditions == {"name": "!= null"} + icat_query.attributes == [attribute_name] and icat_query.aggregate == "DISTINCT" ) @@ -19,8 +26,7 @@ def test_valid_list_fields_input(self, icat_query): test_filter.apply_filter(icat_query) assert ( - icat_query.conditions - == {"doi": "!= null", "name": "!= null", "title": "!= null"} + icat_query.attributes == ["doi", "name", "title"] and icat_query.aggregate == "DISTINCT" ) @@ -35,11 +41,30 @@ def test_distinct_aggregate_added(self, icat_query): assert icat_query.aggregate == "DISTINCT" - @pytest.mark.parametrize("existing_aggregate", ["COUNT", "AVG", "SUM"]) - def test_existing_aggregate_appended(self, icat_query, existing_aggregate): + @pytest.mark.parametrize( + "existing_aggregate, expected_instance_aggregate", + [ + pytest.param( + "COUNT", "DISTINCT", id="Existing count aggregate (count endpoints)", + ), + pytest.param("AVG", "AVG:DISTINCT", id="Existing avg aggregate"), + pytest.param("SUM", "SUM:DISTINCT", id="Existing sum aggregate"), + ], + ) + def test_existing_aggregate_on_query( + self, icat_query, existing_aggregate, expected_instance_aggregate, + ): icat_query.setAggregate(existing_aggregate) test_filter = PythonICATDistinctFieldFilter("name") test_filter.apply_filter(icat_query) - assert icat_query.aggregate == f"{existing_aggregate}:DISTINCT" + assert icat_query.aggregate == expected_instance_aggregate + + def test_manual_count_flag(self, icat_query): + icat_query.setAggregate("COUNT") + + test_filter = PythonICATDistinctFieldFilter("name") + test_filter.apply_filter(icat_query) + + assert icat_query.manual_count diff --git a/test/icat/test_query.py b/test/icat/test_query.py index d154ad7e..869c6445 100644 --- a/test/icat/test_query.py +++ b/test/icat/test_query.py @@ -1,10 +1,10 @@ -from datetime import datetime +from datetime import datetime, timezone from icat.entity import Entity import pytest from datagateway_api.common.date_handler import DateHandler -from datagateway_api.common.exceptions import FilterError, PythonICATError +from datagateway_api.common.exceptions import PythonICATError from datagateway_api.common.icat.filters import ( PythonICATSkipFilter, PythonICATWhereFilter, @@ -12,7 +12,7 @@ from datagateway_api.common.icat.query import ICATQuery -def prepare_icat_data_for_assertion(data, remove_id=False): +def prepare_icat_data_for_assertion(data, remove_id=False, remove_visit_id=False): """ Remove meta attributes from ICAT data. Meta attributes contain data about data creation/modification, and should be removed to ensure correct assertion values @@ -38,6 +38,8 @@ def prepare_icat_data_for_assertion(data, remove_id=False): # meta_attributes is immutable if remove_id: entity.pop("id") + if remove_visit_id: + entity.pop("visitId") assertable_data.append(entity) @@ -45,29 +47,272 @@ def prepare_icat_data_for_assertion(data, remove_id=False): class TestICATQuery: - def test_valid_query_creation(self, icat_client): - # Paramatise and add inputs for conditions, aggregate and includes - test_query = ICATQuery(icat_client, "User") + @pytest.mark.parametrize( + "input_conditions, input_aggregate, input_includes, expected_conditions," + " expected_aggregate, expected_includes", + [ + pytest.param( + {"fullName": "like Bob"}, + None, + None, + {"fullName": "like Bob"}, + None, + set(), + id="Query with condition", + ), + pytest.param( + None, + "DISTINCT", + None, + {}, + "DISTINCT", + set(), + id="Query with aggregate", + ), + pytest.param( + None, + None, + ["instrumentScientists"], + {}, + None, + {"instrumentScientists"}, + id="Query with included entity", + ), + ], + ) + def test_valid_query_creation( + self, + icat_client, + input_conditions, + input_aggregate, + input_includes, + expected_conditions, + expected_aggregate, + expected_includes, + ): + test_query = ICATQuery( + icat_client, + "User", + conditions=input_conditions, + aggregate=input_aggregate, + includes=input_includes, + ) assert test_query.query.entity == icat_client.getEntityClass("User") + assert test_query.query.conditions == expected_conditions + assert test_query.query.aggregate == expected_aggregate + assert test_query.query.includes == expected_includes + + def test_valid_manual_count_flag_init(self, icat_client): + """ + Flag required for distinct filters used on count endpoints should be initialised + in `__init__()` of ICATQuery` + """ + test_query = ICATQuery(icat_client, "User") + + assert not test_query.query.manual_count def test_invalid_query_creation(self, icat_client): with pytest.raises(PythonICATError): ICATQuery(icat_client, "User", conditions={"invalid": "invalid"}) + @pytest.mark.parametrize( + "query_conditions, query_aggregate, query_includes, query_attributes" + ", manual_count, return_json_format_flag, expected_query_result", + [ + pytest.param( + { + "title": "like '%Test data for the Python ICAT Backend on" + " DataGateway API%'", + }, + None, + None, + None, + False, + True, + [ + { + "doi": None, + "endDate": "2020-01-08 01:01:01+00:00", + "name": "Test Data for DataGateway API Testing 0", + "releaseDate": None, + "startDate": "2020-01-04 01:01:01+00:00", + "summary": None, + "title": "Test data for the Python ICAT Backend on DataGateway" + " API 0", + }, + ], + id="Ordinary query", + ), + pytest.param( + { + "title": "like '%Test data for the Python ICAT Backend on" + " DataGateway API%'", + }, + None, + ["facility"], + None, + False, + True, + [ + { + "doi": None, + "endDate": "2020-01-08 01:01:01+00:00", + "name": "Test Data for DataGateway API Testing 0", + "releaseDate": None, + "startDate": "2020-01-04 01:01:01+00:00", + "summary": None, + "title": "Test data for the Python ICAT Backend on DataGateway" + " API 0", + "facility": { + "createId": "user", + "createTime": "2011-01-29 06:19:43+00:00", + "daysUntilRelease": 10, + "description": "Lorem ipsum light source", + "fullName": None, + "id": 1, + "modId": "user", + "modTime": "2008-10-15 12:05:09+00:00", + "name": "LILS", + "url": None, + }, + }, + ], + id="Query with included entity", + ), + pytest.param( + { + "title": "like '%Test data for the Python ICAT Backend on" + " DataGateway API%'", + }, + "COUNT", + None, + None, + False, + True, + [1], + id="Count query", + ), + pytest.param( + { + "title": "like '%Test data for the Python ICAT Backend on" + " DataGateway API%'", + }, + None, + None, + None, + False, + False, + [ + { + "doi": None, + "endDate": "2020-01-08 01:01:01+00:00", + "name": "Test Data for DataGateway API Testing 0", + "releaseDate": None, + "startDate": "2020-01-04 01:01:01+00:00", + "summary": None, + "title": "Test data for the Python ICAT Backend on DataGateway" + " API 0", + }, + ], + id="Data returned as entity objects", + ), + pytest.param( + { + "title": "like '%Test data for the Python ICAT Backend on" + " DataGateway API%'", + }, + "DISTINCT", + None, + "title", + False, + True, + [ + { + "title": "Test data for the Python ICAT Backend on DataGateway" + " API 0", + }, + ], + id="Single distinct field", + ), + pytest.param( + { + "title": "like '%Test data for the Python ICAT Backend on" + " DataGateway API%'", + }, + "DISTINCT", + None, + ["title", "name"], + False, + True, + [ + { + "title": "Test data for the Python ICAT Backend on DataGateway" + " API 0", + "name": "Test Data for DataGateway API Testing 0", + }, + ], + id="Multiple distinct fields", + ), + pytest.param( + { + "title": "like '%Test data for the Python ICAT Backend on" + " DataGateway API%'", + }, + "DISTINCT", + None, + ["title", "name"], + True, + True, + [1], + id="Multiple distinct fields on count query", + ), + pytest.param( + {"title": "like '%Unknown testing data for DG API%'"}, + "DISTINCT", + None, + ["title", "name"], + True, + True, + [0], + id="Multiple distinct fields on count query to return 0 matches", + ), + ], + ) + @pytest.mark.usefixtures("single_investigation_test_data") def test_valid_query_exeuction( - self, icat_client, single_investigation_test_data, + self, + icat_client, + query_conditions, + query_aggregate, + query_includes, + query_attributes, + manual_count, + return_json_format_flag, + expected_query_result, ): - test_query = ICATQuery(icat_client, "Investigation") - test_data_filter = PythonICATWhereFilter( - "title", "Test data for the Python ICAT Backend on DataGateway API", "like", + test_query = ICATQuery( + icat_client, + "Investigation", + conditions=query_conditions, + aggregate=query_aggregate, + includes=query_includes, + ) + test_query.query.setAttributes(query_attributes) + test_query.query.manual_count = manual_count + query_data = test_query.execute_query( + icat_client, return_json_formattable=return_json_format_flag, ) - test_data_filter.apply_filter(test_query.query) - query_data = test_query.execute_query(icat_client) - query_output_dicts = prepare_icat_data_for_assertion(query_data) + if ( + test_query.query.aggregate != "COUNT" + and test_query.query.aggregate != "DISTINCT" + ): + query_data = prepare_icat_data_for_assertion( + query_data, remove_id=True, remove_visit_id=True, + ) - assert query_output_dicts == single_investigation_test_data + assert query_data == expected_query_result def test_invalid_query_execution(self, icat_client): test_query = ICATQuery(icat_client, "Investigation") @@ -94,150 +339,71 @@ def test_json_format_execution_output( assert query_output_json == single_investigation_test_data - @pytest.mark.parametrize( - "input_distinct_fields, included_fields, expected_output", - [ - pytest.param( - ["id"], - [], - {"base": ["id"]}, - id="Base only distinct attribute, no included attributes", - ), - pytest.param( - ["id", "doi", "name", "createTime"], - [], - {"base": ["id", "doi", "name", "createTime"]}, - id="Multiple base only distinct attributes, no included attributes", - ), - pytest.param( - ["id"], - ["investigation"], - {"base": ["id"]}, - id="Base only distinct attribute, single, unnested included attributes", - ), - pytest.param( - ["id"], - ["investigation", "parameters", "type"], - {"base": ["id"]}, - id="Base only distinct attribute, multiple, unnested included" - " attributes", - ), - pytest.param( - ["dataset.investigation.name"], - ["dataset", "investigation"], - {"base": [], "dataset": [], "investigation": ["name"]}, - id="Single nested-include distinct attribute", - ), - pytest.param( - ["dataset.investigation.name", "datafileFormat.facility.url"], - ["dataset", "investigation", "datafileFormat", "facility"], - { - "base": [], - "dataset": [], - "investigation": ["name"], - "datafileFormat": [], - "facility": ["url"], - }, - id="Multiple nested-include distinct attributes", - ), - ], - ) - def test_valid_distinct_attribute_mapping( - self, icat_client, input_distinct_fields, included_fields, expected_output, - ): - # Entity name passed to ICATQuery is irrelevant for this test - test_query = ICATQuery(icat_client, "Datafile") - - mapped_attributes = test_query.map_distinct_attributes_to_entity_names( - input_distinct_fields, included_fields, - ) - - assert mapped_attributes == expected_output - - @pytest.mark.parametrize( - "input_distinct_fields, included_fields", - [ - pytest.param( - ["investigation.id"], - [], - id="Single nested-include distinct attribute, included entity not" - " added", - ), - ], - ) - def test_invalid_distinct_attribute_mapping( - self, icat_client, input_distinct_fields, included_fields, - ): - """ - Test that when the appropriate included fields are not present, a `FilterError` - will be raised - """ - test_query = ICATQuery(icat_client, "Datafile") + def test_valid_get_distinct_attributes(self, icat_client): + test_query = ICATQuery(icat_client, "Investigation") + test_query.query.setAttributes(["summary", "name"]) - with pytest.raises(FilterError): - test_query.map_distinct_attributes_to_entity_names( - input_distinct_fields, included_fields, - ) + assert test_query.get_distinct_attributes() == ["summary", "name"] @pytest.mark.parametrize( - "included_entity_name, input_fields, expected_fields", + "distinct_attrs, result, expected_output", [ pytest.param( - "dataset", - {"base": ["id"]}, - {"base": []}, - id="Include filter used but no included attributes on distinct filter," - " no entity name match", + ["summary"], + ["Summary 1"], + {"summary": "Summary 1"}, + id="Single attribute", ), pytest.param( - "no match", - {"base": ["id"], "dataset": ["name"]}, - {"base": [], "dataset": ["name"]}, - id="Distinct filter contains included attributes, no entity name match", + ["startDate"], + ( + datetime( + year=2020, + month=1, + day=4, + hour=1, + minute=1, + second=1, + tzinfo=timezone.utc, + ), + ), + {"startDate": "2020-01-04 01:01:01+00:00"}, + id="Single date attribute", ), pytest.param( - "dataset", - {"base": ["id"], "dataset": ["name"]}, - {"base": ["name"], "dataset": ["name"]}, - id="Distinct filter contains included attributes, entity name match", + ["summary", "title"], + ("Summary 1", "Title 1"), + {"summary": "Summary 1", "title": "Title 1"}, + id="Multiple attributes", ), pytest.param( - "dataset", - {"base": ["id"], "dataset": [], "investigation": ["name"]}, - {"base": [], "dataset": [], "investigation": ["name"]}, - id="Distinct filter contains nested included attributes, no entity name" - " match", + ["summary", "investigationUsers.role"], + ("Summary 1", "PI"), + {"summary": "Summary 1", "investigationUsers": {"role": "PI"}}, + id="Multiple attributes with related attribute", ), pytest.param( - "investigation", - {"base": ["id"], "dataset": [], "investigation": ["name"]}, - {"base": ["name"], "dataset": [], "investigation": ["name"]}, - id="Distinct filter contains nested included attributes, entity name" - " match", + ["summary", "investigationUsers.investigation.name"], + ("Summary 1", "Investigation Name 1"), + { + "summary": "Summary 1", + "investigationUsers": { + "investigation": {"name": "Investigation Name 1"}, + }, + }, + id="Multiple attributes with 2-level nested related attribute", ), ], ) - def test_prepare_distinct_fields( - self, icat_client, included_entity_name, input_fields, expected_fields, + def test_valid_map_distinct_attributes_to_results( + self, icat_client, distinct_attrs, result, expected_output, ): - """ - The function tested here should move the list from - `input_fields[included_entity_name]` to `input_fields["base"]` ready for when - `entity_to_dict()` is called as part of a recursive call, but the original - `input_fields` should not be modified. This caused a bug previously - """ - unmodded_distinct_fields = input_fields.copy() - test_query = ICATQuery(icat_client, "Datafile") - - distinct_fields_for_recursive_call = test_query.prepare_distinct_fields( - included_entity_name, input_fields, + test_query = ICATQuery(icat_client, "Investigation") + test_output = test_query.map_distinct_attributes_to_results( + distinct_attrs, result, ) - print(distinct_fields_for_recursive_call) - print(input_fields) - assert distinct_fields_for_recursive_call == expected_fields - # prepare_distinct_fields() should not modify the original `distinct_fields` - assert input_fields == unmodded_distinct_fields + assert test_output == expected_output def test_include_fields_list_flatten(self, icat_client): included_field_set = {