Skip to content

Commit

Permalink
Merge pull request #161 from ral-facilities/feature/icat-distinct-fil…
Browse files Browse the repository at this point in the history
…ter-#141

Implement Distinct Filter for Python ICAT Backend
  • Loading branch information
MRichards99 authored Oct 1, 2020
2 parents 767aef1 + acc9672 commit e419d8c
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 97 deletions.
1 change: 1 addition & 0 deletions common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
class Constants:
DATABASE_URL = config.get_db_url()
ACCEPTED_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
PYTHON_ICAT_DISTNCT_CONDITION = "!= null"
ICAT_PROPERTIES = config.get_icat_properties()
9 changes: 9 additions & 0 deletions common/filters.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from abc import ABC, abstractmethod
import logging

from common.exceptions import BadRequestError

log = logging.getLogger()


Expand All @@ -27,6 +29,13 @@ def __init__(self, field, value, operation):
self.value = value
self.operation = operation

if self.operation == "in":
if not isinstance(self.value, list):
raise BadRequestError(
"When using the 'in' operation for a WHERE filter, the values must"
" be in a list format e.g. [1, 2, 3]"
)

def _extract_filter_fields(self, field):
fields = field.split(".")
include_depth = len(fields)
Expand Down
32 changes: 26 additions & 6 deletions common/icat/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ def apply_filter(self, query):
log.info("Creating condition for ICAT where filter")
if self.operation == "eq":
where_filter = self.create_condition(self.field, "=", self.value)
elif self.operation == "ne":
where_filter = self.create_condition(self.field, "!=", self.value)
elif self.operation == "like":
where_filter = self.create_condition(self.field, "like", self.value)
where_filter = self.create_condition(self.field, "like", f"%{self.value}%")
elif self.operation == "lt":
where_filter = self.create_condition(self.field, "<", self.value)
elif self.operation == "lte":
Expand All @@ -34,7 +36,11 @@ def apply_filter(self, query):
elif self.operation == "gte":
where_filter = self.create_condition(self.field, ">=", self.value)
elif self.operation == "in":
where_filter = self.create_condition(self.field, "in", tuple(self.value))
# Convert self.value into a string with brackets equivalent to tuple format.
# Cannot convert straight to tuple as single element tuples contain a
# trailing comma which Python ICAT/JPQL doesn't accept
self.value = str(self.value).replace("[", "(").replace("]", ")")
where_filter = self.create_condition(self.field, "in", self.value)
else:
raise FilterError(f"Bad operation given to where filter: {self.operation}")

Expand Down Expand Up @@ -64,10 +70,13 @@ def create_condition(attribute_name, operator, value):
"""

conditions = {}
# Removing quote marks when doing conditions with IN expressions
jpql_value = f"{value}" if isinstance(value, tuple) else f"'{value}'"
# Removing quote marks when doing conditions with IN expressions or when a
# distinct filter is used in a request
jpql_value = (
f"{value}" if operator == "in" or operator == "!=" else f"'{value}'"
)
conditions[attribute_name] = f"{operator} {jpql_value}"

log.debug("Conditions in ICAT where filter, %s", conditions)
return conditions


Expand All @@ -76,7 +85,18 @@ def __init__(self, fields):
super().__init__(fields)

def apply_filter(self, query):
pass
try:
log.info("Adding ICAT distinct filter to ICAT query")
query.setAggregate("DISTINCT")

# Using where filters to identify which fields to apply distinct too
for field in self.fields:
where_filter = PythonICATWhereFilter(field, "null", "ne")
where_filter.apply_filter(query)

log.debug("Fields for distinct filter: %s", self.fields)
except ValueError as e:
raise FilterError(e)


class PythonICATOrderFilter(OrderFilter):
Expand Down
98 changes: 7 additions & 91 deletions common/icat/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import logging
from datetime import datetime, timedelta

from icat.query import Query
from icat.exception import ICATSessionError, ICATValidationError
from common.exceptions import (
AuthenticationError,
Expand All @@ -18,6 +17,7 @@
PythonICATSkipFilter,
PythonICATOrderFilter,
)
from common.icat.query import ICATQuery


log = logging.getLogger()
Expand Down Expand Up @@ -94,87 +94,6 @@ def refresh_client_session(client):
client.refresh()


def construct_icat_query(
client, entity_name, conditions=None, aggregate=None, includes=None
):
"""
Create a Query object within Python ICAT
:param client: ICAT client containing an authenticated user
:type client: :class:`icat.client.Client`
:param entity_name: Name of the entity to get data from
:type entity_name: :class:`suds.sax.text.Text`
:param conditions: Constraints used when an entity is queried
:type conditions: :class:`dict`
:param aggregate: Name of the aggregate function to apply. Operations such as
counting the number of records. See `icat.query.setAggregate` for valid values.
:type aggregate: :class:`str`
:param includes: List of related entity names to add to the query so related
entities (and their data) can be returned with the query result
:type includes: :class:`str` or iterable of :class:`str`
:return: Query object from Python ICAT
:raises PythonICATError: If a ValueError is raised when creating a Query(), 500 will
be returned as a response
"""

try:
query = Query(
client,
entity_name,
conditions=conditions,
aggregate=aggregate,
includes=includes,
)
except ValueError:
raise PythonICATError(
"An issue has occurred while creating a Python ICAT Query object,"
" suggesting an invalid argument"
)

return query


def execute_icat_query(client, query, return_json_formattable=False):
"""
Execute a previously created ICAT Query object and return in the format specified
by the return_json_formattable flag
:param client: ICAT client containing an authenticated user
:type client: :class:`icat.client.Client`
:param query: ICAT Query object to execute within Python ICAT
:type query: :class:`icat.query.Query`
:param return_json_formattable: Flag to determine whether the data from the query
should be returned as a list of data ready to be converted straight to JSON
(i.e. if the data will be used as a response for an API call) or whether to
leave the data in a Python ICAT format (i.e. if it's going to be manipulated at
some point)
:type return_json_formattable_data: :class:`bool`
:return: Data (of type list) from the executed query
"""

try:
query_result = client.search(query)
except ICATValidationError as e:
raise PythonICATError(e)

if return_json_formattable:
data = []
for result in query_result:
dict_result = result.as_dict()
for key, value in dict_result.items():
# Convert datetime objects to strings so they can be JSON serialisable
if isinstance(value, datetime):
# Remove timezone data which isn't utilised in ICAT
dict_result[key] = value.replace(tzinfo=None).strftime(
Constants.ACCEPTED_DATE_FORMAT
)

data.append(dict_result)
return data
else:
return query_result


def get_python_icat_entity_name(client, database_table_name):
"""
From the database table name, this function returns the correctly cased entity name
Expand Down Expand Up @@ -303,17 +222,14 @@ def get_entity_by_id(client, table_name, id_, return_json_formattable_data):
:raises: MissingRecordError: If Python ICAT cannot find a record of the specified ID
"""

selected_entity_name = get_python_icat_entity_name(client, table_name)
# Set query condition for the selected ID
id_condition = PythonICATWhereFilter.create_condition("id", "=", id_)

selected_entity_name = get_python_icat_entity_name(client, table_name)

id_query = construct_icat_query(
id_query = ICATQuery(
client, selected_entity_name, conditions=id_condition, includes="1"
)
entity_by_id_data = execute_icat_query(
client, id_query, return_json_formattable_data
)
entity_by_id_data = id_query.execute_query(client, return_json_formattable_data)

if not entity_by_id_data:
# Cannot find any data matching the given ID
Expand Down Expand Up @@ -379,15 +295,15 @@ def get_entity_with_filters(client, table_name, filters):
"""

selected_entity_name = get_python_icat_entity_name(client, table_name)
query = construct_icat_query(client, selected_entity_name)
query = ICATQuery(client, selected_entity_name)

filter_handler = FilterOrderHandler()
filter_handler.add_filters(filters)
merge_limit_skip_filters(filter_handler)
clear_order_filters(filter_handler.filters)
filter_handler.apply_filters(query)
filter_handler.apply_filters(query.query)

data = execute_icat_query(client, query, True)
data = query.execute_query(client, True)

if not data:
raise MissingRecordError("No results found")
Expand Down
141 changes: 141 additions & 0 deletions common/icat/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import logging
from datetime import datetime

from icat.query import Query
from icat.exception import ICATValidationError

from common.exceptions import PythonICATError
from common.constants import Constants

log = logging.getLogger()


class ICATQuery:
def __init__(
self, client, entity_name, conditions=None, aggregate=None, includes=None
):
"""
Create a Query object within Python ICAT
:param client: ICAT client containing an authenticated user
:type client: :class:`icat.client.Client`
:param entity_name: Name of the entity to get data from
:type entity_name: :class:`suds.sax.text.Text`
:param conditions: Constraints used when an entity is queried
:type conditions: :class:`dict`
:param aggregate: Name of the aggregate function to apply. Operations such as
counting the number of records. See `icat.query.setAggregate` for valid
values.
:type aggregate: :class:`str`
:param includes: List of related entity names to add to the query so related
entities (and their data) can be returned with the query result
:type includes: :class:`str` or iterable of :class:`str`
:return: Query object from Python ICAT
:raises PythonICATError: If a ValueError is raised when creating a Query(), 500
will be returned as a response
"""

try:
self.query = Query(
client,
entity_name,
conditions=conditions,
aggregate=aggregate,
includes=includes,
)
except ValueError:
raise PythonICATError(
"An issue has occurred while creating a Python ICAT Query object,"
" suggesting an invalid argument"
)

def execute_query(self, client, return_json_formattable=False):
"""
Execute a previously created ICAT Query object and return in the format
specified by the return_json_formattable flag
:param client: ICAT client containing an authenticated user
:type client: :class:`icat.client.Client`
:param return_json_formattable: Flag to determine whether the data from the
query should be returned as a list of data ready to be converted straight to
JSON (i.e. if the data will be used as a response for an API call) or
whether to leave the data in a Python ICAT format (i.e. if it's going to be
manipulated at some point)
:type return_json_formattable_data: :class:`bool`
:return: Data (of type list) from the executed query
"""

try:
query_result = client.search(self.query)
except ICATValidationError as e:
raise PythonICATError(e)

if self.query.aggregate == "DISTINCT":
distinct_filter_flag = True
# Check query's conditions for the ones created by the distinct filter
self.attribute_names = []
log.debug("Query conditions: %s", self.query.conditions)

for key, value in self.query.conditions.items():
# Value can be a list if there's multiple WHERE filters for the same
# attribute name within an ICAT query
if isinstance(value, list):
for sub_value in value:
self.check_attribute_name_for_distinct(key, sub_value)
elif isinstance(value, str):
self.check_attribute_name_for_distinct(key, value)
log.debug(
"Attribute names used in the distinct filter, as captured by the"
" query's conditions %s",
self.attribute_names,
)
else:
distinct_filter_flag = False

if return_json_formattable:
data = []
for result in query_result:
dict_result = result.as_dict()
distinct_result = {}

for key in dict_result:
# Convert datetime objects to strings so they can be JSON
# serialisable
if isinstance(dict_result[key], datetime):
# Remove timezone data which isn't utilised in ICAT
dict_result[key] = (
dict_result[key]
.replace(tzinfo=None)
.strftime(Constants.ACCEPTED_DATE_FORMAT)
)

if distinct_filter_flag:
# Add only the required data as per request's distinct filter
# fields
if key in self.attribute_names:
distinct_result[key] = dict_result[key]

# Add to the response's data depending on whether request has a distinct
# filter
if distinct_filter_flag:
data.append(distinct_result)
else:
data.append(dict_result)
return data
else:
return query_result

def check_attribute_name_for_distinct(self, key, value):
"""
Check the attribute name to see if its associated value is used to signify the
attribute is requested in a distinct filter and if so, append it to the list of
attribute names
:param key: Name of an attribute
:type key: :class:`str`
:param value: Expression that should be applied to the associated attribute
e.g. "= 'Metadata'"
:type value: :class:`str`
"""
if value == Constants.PYTHON_ICAT_DISTNCT_CONDITION:
self.attribute_names.append(key)

0 comments on commit e419d8c

Please sign in to comment.