Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Distinct Filter for Python ICAT Backend #161

Merged
merged 14 commits into from
Oct 1, 2020
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
class Constants:
DATABASE_URL = config.get_db_url()
ACCEPTED_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
PYTHON_ICAT_DISTNCT_CONDITION = "!= null"
ICAT_PROPERTIES = config.get_icat_properties()
9 changes: 9 additions & 0 deletions common/filters.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from abc import ABC, abstractmethod
import logging

from common.exceptions import BadRequestError

log = logging.getLogger()


Expand All @@ -27,6 +29,13 @@ def __init__(self, field, value, operation):
self.value = value
self.operation = operation

if self.operation == "in":
if not isinstance(self.value, list):
raise BadRequestError(
"When using the 'in' operation for a WHERE filter, the values must"
" be in a list format e.g. [1, 2, 3]"
)

def _extract_filter_fields(self, field):
fields = field.split(".")
include_depth = len(fields)
Expand Down
30 changes: 25 additions & 5 deletions common/icat/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def apply_filter(self, query):
log.info("Creating condition for ICAT where filter")
if self.operation == "eq":
where_filter = self.create_condition(self.field, "=", self.value)
elif self.operation == "ne":
where_filter = self.create_condition(self.field, "!=", self.value)
elif self.operation == "like":
where_filter = self.create_condition(self.field, "like", self.value)
elif self.operation == "lt":
Expand All @@ -34,7 +36,11 @@ def apply_filter(self, query):
elif self.operation == "gte":
where_filter = self.create_condition(self.field, ">=", self.value)
elif self.operation == "in":
where_filter = self.create_condition(self.field, "in", tuple(self.value))
# Convert self.value into a string with brackets equivalent to tuple format.
# Cannot convert straight to tuple as single element tuples contain a
# trailing comma which Python ICAT/JPQL doesn't accept
self.value = str(self.value).replace("[", "(").replace("]", ")")
where_filter = self.create_condition(self.field, "in", self.value)
else:
raise FilterError(f"Bad operation given to where filter: {self.operation}")

Expand Down Expand Up @@ -64,10 +70,13 @@ def create_condition(attribute_name, operator, value):
"""

conditions = {}
# Removing quote marks when doing conditions with IN expressions
jpql_value = f"{value}" if isinstance(value, tuple) else f"'{value}'"
# Removing quote marks when doing conditions with IN expressions or when a
# distinct filter is used in a request
jpql_value = (
f"{value}" if operator == "in" or operator == "!=" else f"'{value}'"
)
conditions[attribute_name] = f"{operator} {jpql_value}"

log.debug("Conditions in ICAT where filter, %s", conditions)
return conditions


Expand All @@ -76,7 +85,18 @@ def __init__(self, fields):
super().__init__(fields)

def apply_filter(self, query):
pass
try:
log.info("Adding ICAT distinct filter to ICAT query")
query.setAggregate("DISTINCT")

# Using where filters to identify which fields to apply distinct too
for field in self.fields:
where_filter = PythonICATWhereFilter(field, "null", "ne")
where_filter.apply_filter(query)

log.debug("Fields for distinct filter: %s", self.fields)
except ValueError as e:
raise FilterError(e)


class PythonICATOrderFilter(OrderFilter):
Expand Down
98 changes: 7 additions & 91 deletions common/icat/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import logging
from datetime import datetime, timedelta

from icat.query import Query
from icat.exception import ICATSessionError, ICATValidationError
from common.exceptions import (
AuthenticationError,
Expand All @@ -18,6 +17,7 @@
PythonICATSkipFilter,
PythonICATOrderFilter,
)
from common.icat.query import ICATQuery


log = logging.getLogger()
Expand Down Expand Up @@ -94,87 +94,6 @@ def refresh_client_session(client):
client.refresh()


def construct_icat_query(
client, entity_name, conditions=None, aggregate=None, includes=None
):
"""
Create a Query object within Python ICAT

:param client: ICAT client containing an authenticated user
:type client: :class:`icat.client.Client`
:param entity_name: Name of the entity to get data from
:type entity_name: :class:`suds.sax.text.Text`
:param conditions: Constraints used when an entity is queried
:type conditions: :class:`dict`
:param aggregate: Name of the aggregate function to apply. Operations such as
counting the number of records. See `icat.query.setAggregate` for valid values.
:type aggregate: :class:`str`
:param includes: List of related entity names to add to the query so related
entities (and their data) can be returned with the query result
:type includes: :class:`str` or iterable of :class:`str`
:return: Query object from Python ICAT
:raises PythonICATError: If a ValueError is raised when creating a Query(), 500 will
be returned as a response
"""

try:
query = Query(
client,
entity_name,
conditions=conditions,
aggregate=aggregate,
includes=includes,
)
except ValueError:
raise PythonICATError(
"An issue has occurred while creating a Python ICAT Query object,"
" suggesting an invalid argument"
)

return query


def execute_icat_query(client, query, return_json_formattable=False):
"""
Execute a previously created ICAT Query object and return in the format specified
by the return_json_formattable flag

:param client: ICAT client containing an authenticated user
:type client: :class:`icat.client.Client`
:param query: ICAT Query object to execute within Python ICAT
:type query: :class:`icat.query.Query`
:param return_json_formattable: Flag to determine whether the data from the query
should be returned as a list of data ready to be converted straight to JSON
(i.e. if the data will be used as a response for an API call) or whether to
leave the data in a Python ICAT format (i.e. if it's going to be manipulated at
some point)
:type return_json_formattable_data: :class:`bool`
:return: Data (of type list) from the executed query
"""

try:
query_result = client.search(query)
except ICATValidationError as e:
raise PythonICATError(e)

if return_json_formattable:
data = []
for result in query_result:
dict_result = result.as_dict()
for key, value in dict_result.items():
# Convert datetime objects to strings so they can be JSON serialisable
if isinstance(value, datetime):
# Remove timezone data which isn't utilised in ICAT
dict_result[key] = value.replace(tzinfo=None).strftime(
Constants.ACCEPTED_DATE_FORMAT
)

data.append(dict_result)
return data
else:
return query_result


def get_python_icat_entity_name(client, database_table_name):
"""
From the database table name, this function returns the correctly cased entity name
Expand Down Expand Up @@ -303,17 +222,14 @@ def get_entity_by_id(client, table_name, id_, return_json_formattable_data):
:raises: MissingRecordError: If Python ICAT cannot find a record of the specified ID
"""

selected_entity_name = get_python_icat_entity_name(client, table_name)
# Set query condition for the selected ID
id_condition = PythonICATWhereFilter.create_condition("id", "=", id_)

selected_entity_name = get_python_icat_entity_name(client, table_name)

id_query = construct_icat_query(
id_query = ICATQuery(
client, selected_entity_name, conditions=id_condition, includes="1"
)
entity_by_id_data = execute_icat_query(
client, id_query, return_json_formattable_data
)
entity_by_id_data = id_query.execute_query(client, return_json_formattable_data)

if not entity_by_id_data:
# Cannot find any data matching the given ID
Expand Down Expand Up @@ -379,15 +295,15 @@ def get_entity_with_filters(client, table_name, filters):
"""

selected_entity_name = get_python_icat_entity_name(client, table_name)
query = construct_icat_query(client, selected_entity_name)
query = ICATQuery(client, selected_entity_name)

filter_handler = FilterOrderHandler()
filter_handler.add_filters(filters)
merge_limit_skip_filters(filter_handler)
clear_order_filters(filter_handler.filters)
filter_handler.apply_filters(query)
filter_handler.apply_filters(query.query)

data = execute_icat_query(client, query, True)
data = query.execute_query(client, True)

if not data:
raise MissingRecordError("No results found")
Expand Down
141 changes: 141 additions & 0 deletions common/icat/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import logging
from datetime import datetime

from icat.query import Query
from icat.exception import ICATValidationError

from common.exceptions import PythonICATError
from common.constants import Constants

log = logging.getLogger()


class ICATQuery:
def __init__(
self, client, entity_name, conditions=None, aggregate=None, includes=None
):
"""
Create a Query object within Python ICAT

:param client: ICAT client containing an authenticated user
:type client: :class:`icat.client.Client`
:param entity_name: Name of the entity to get data from
:type entity_name: :class:`suds.sax.text.Text`
:param conditions: Constraints used when an entity is queried
:type conditions: :class:`dict`
:param aggregate: Name of the aggregate function to apply. Operations such as
counting the number of records. See `icat.query.setAggregate` for valid
values.
:type aggregate: :class:`str`
:param includes: List of related entity names to add to the query so related
entities (and their data) can be returned with the query result
:type includes: :class:`str` or iterable of :class:`str`
:return: Query object from Python ICAT
:raises PythonICATError: If a ValueError is raised when creating a Query(), 500
will be returned as a response
"""

try:
self.query = Query(
client,
entity_name,
conditions=conditions,
aggregate=aggregate,
includes=includes,
)
except ValueError:
raise PythonICATError(
"An issue has occurred while creating a Python ICAT Query object,"
" suggesting an invalid argument"
)

def execute_query(self, client, return_json_formattable=False):
"""
Execute a previously created ICAT Query object and return in the format
specified by the return_json_formattable flag

:param client: ICAT client containing an authenticated user
:type client: :class:`icat.client.Client`
:param return_json_formattable: Flag to determine whether the data from the
query should be returned as a list of data ready to be converted straight to
JSON (i.e. if the data will be used as a response for an API call) or
whether to leave the data in a Python ICAT format (i.e. if it's going to be
manipulated at some point)
:type return_json_formattable_data: :class:`bool`
:return: Data (of type list) from the executed query
"""

try:
query_result = client.search(self.query)
except ICATValidationError as e:
raise PythonICATError(e)

if self.query.aggregate == "DISTINCT":
distinct_filter_flag = True
# Check query's conditions for the ones created by the distinct filter
self.attribute_names = []
log.debug("Query conditions: %s", self.query.conditions)

for key, value in self.query.conditions.items():
# Value can be a list if there's multiple WHERE filters for the same
# attribute name within an ICAT query
if isinstance(value, list):
for sub_value in value:
self.check_attribute_name_for_distinct(key, sub_value)
elif isinstance(value, str):
self.check_attribute_name_for_distinct(key, value)
log.debug(
"Attribute names used in the distinct filter, as captured by the"
" query's conditions %s",
self.attribute_names,
)
else:
distinct_filter_flag = False

if return_json_formattable:
data = []
for result in query_result:
dict_result = result.as_dict()
distinct_result = {}

for key in dict_result:
# Convert datetime objects to strings so they can be JSON
# serialisable
if isinstance(dict_result[key], datetime):
# Remove timezone data which isn't utilised in ICAT
dict_result[key] = (
dict_result[key]
.replace(tzinfo=None)
.strftime(Constants.ACCEPTED_DATE_FORMAT)
)

if distinct_filter_flag:
# Add only the required data as per request's distinct filter
# fields
if key in self.attribute_names:
distinct_result[key] = dict_result[key]

# Add to the response's data depending on whether request has a distinct
# filter
if distinct_filter_flag:
data.append(distinct_result)
else:
data.append(dict_result)
return data
else:
return query_result

def check_attribute_name_for_distinct(self, key, value):
"""
Check the attribute name to see if its associated value is used to signify the
attribute is requested in a distinct filter and if so, append it to the list of
attribute names

:param key: Name of an attribute
:type key: :class:`str`
:param value: Expression that should be applied to the associated attribute
e.g. "= 'Metadata'"
:type value: :class:`str`
"""
if value == Constants.PYTHON_ICAT_DISTNCT_CONDITION:
self.attribute_names.append(key)