Skip to content

Commit

Permalink
Code cleanup
Browse files Browse the repository at this point in the history
- Add docstrings
- Remove client_pool kwargs
- Add type hints
- Fix any outstanding linting issues
  • Loading branch information
MRichards99 committed Oct 2, 2024
1 parent 74185f9 commit a36b607
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 25 deletions.
8 changes: 2 additions & 6 deletions datagateway_api/src/datagateway_api/icat/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,7 @@ def logout(self, session_id, **kwargs):
@requires_session_id
@queries_records
def get_with_filters(self, session_id, entity_type, filters, **kwargs):
return get_entity_with_filters(
kwargs.get("client"), entity_type, filters, kwargs.get("client_pool"),
)
return get_entity_with_filters(kwargs.get("client"), entity_type, filters)

@requires_session_id
@queries_records
Expand All @@ -110,9 +108,7 @@ def get_one_with_filters(self, session_id, entity_type, filters, **kwargs):
@requires_session_id
@queries_records
def count_with_filters(self, session_id, entity_type, filters, **kwargs):
return get_count_with_filters(
kwargs.get("client"), entity_type, filters, kwargs.get("client_pool"),
)
return get_count_with_filters(kwargs.get("client"), entity_type, filters)

@requires_session_id
@queries_records
Expand Down
33 changes: 23 additions & 10 deletions datagateway_api/src/datagateway_api/icat/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def update_entity_by_id(client, entity_type, id_, new_data):
return get_entity_by_id(client, entity_type, id_, True)


def get_entity_with_filters(client, entity_type, filters, client_pool=None):
def get_entity_with_filters(client, entity_type, filters):
"""
Gets all the records of a given entity, based on the filters provided in the request
Expand All @@ -302,10 +302,10 @@ def get_entity_with_filters(client, entity_type, filters, client_pool=None):
result of the query
"""
log.info("Getting entity using request's filters")
return get_data_with_filters(client, entity_type, filters, client_pool=client_pool)
return get_data_with_filters(client, entity_type, filters)


def get_count_with_filters(client, entity_type, filters, client_pool=None):
def get_count_with_filters(client, entity_type, filters):
"""
Get the number of results of a given entity, based on the filters provided in the
request. This acts very much like `get_entity_with_filters()` but returns the number
Expand All @@ -325,16 +325,24 @@ def get_count_with_filters(client, entity_type, filters, client_pool=None):
entity_type,
)

data = get_data_with_filters(
client, entity_type, filters, aggregate="COUNT", client_pool=client_pool,
)
data = get_data_with_filters(client, entity_type, filters, aggregate="COUNT")
# Only ever 1 element in a count query result
return data[0]


def get_data_with_filters(
client, entity_type, filters, aggregate=None, client_pool=None,
):
def get_data_with_filters(client, entity_type, filters, aggregate=None):
"""
Gets all the records of a given entity, based on the filters and an optional
aggregate provided in the request. This function is called by
`get_entity_with_filters()` and `get_count_with_filters()` that deal with GET entity
and GET /count entity endpoints respectively
This function uses the reader performance query functionality IF it is enabled in
the config. Checks are done to see whether this functionality has been enabled and
whether the query is suitable to be completed with the reader account. There are
more details about the inner workings in ReaderQueryHandler
"""

if not is_use_reader_for_performance_enabled():
# just execute the query as normal
return execute_entity_query(client, entity_type, filters, aggregate=aggregate)
Expand Down Expand Up @@ -370,6 +378,11 @@ def get_data_with_filters(


def execute_entity_query(client, entity_type, filters, aggregate=None):
"""
Assemble a query object with the user's query filters and execute the query by
passing it to ICAT, returning them in this function
"""

query = ICATQuery(client, entity_type, aggregate=aggregate)

filter_handler = FilterOrderHandler()
Expand All @@ -384,7 +397,7 @@ def execute_entity_query(client, entity_type, filters, aggregate=None):
return query.execute_query(client, True)


def is_use_reader_for_performance_enabled():
def is_use_reader_for_performance_enabled() -> bool:
"""
Returns true is the 'use_reader_for_performance' section is present in the
config file and 'enabled' in that section is set to true
Expand Down
75 changes: 66 additions & 9 deletions datagateway_api/src/datagateway_api/icat/reader_query_handler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import logging
from typing import List, Optional

from icat.exception import ICATSessionError

from datagateway_api.src.common.config import Config
from datagateway_api.src.common.exceptions import PythonICATError
from datagateway_api.src.common.filter_order_handler import FilterOrderHandler
from datagateway_api.src.common.filters import QueryFilter
from datagateway_api.src.datagateway_api.icat.filters import PythonICATWhereFilter
from datagateway_api.src.datagateway_api.icat.icat_client_pool import ICATClient
from datagateway_api.src.datagateway_api.icat.query import ICATQuery
Expand All @@ -10,14 +15,38 @@


class ReaderQueryHandler:
# TODO - better names and comments on dicts to explain what they're for
# TODO - add docstrings
"""
This class handles the mechanism that allows 'performance queries' to occur on
particular endpoints. These queries are to improve performance on requests that have
a WHERE filter on the ID of the parent entity where passing the query directly to
ICAT can cause performance issues. This is due to the complexity of the ICAT rules,
meaning a relatively simple SQL query is a long paragraph of SQL. The rules are
bypassed by performing an equivalent check to see if the user can see the parent
entity by querying for it directly. Once permissions have been verified, the user's
original query is executed using a configurable reader account.
On a production instance where this functionality is needed, the reader account will
have been setup with appropriate ICAT rules to view the entities.
Example workflow:
- User sends request to /datafiles with a WHERE filter of dataset.id = 4
- Query is determined as eligble
- Dataset query is sent to ICAT with a WHERE filter of id = 4
- If the appropriate dataset is returned, the user's original query is executed, but
as the reader account, not the user's account
- If no dataset is found (i.e. the user doesn't have permission is view the dataset)
the API responds with a 403
"""

# Lookup to determine which field to search whether a user has permission to view
entity_filter_check = {"Datafile": "dataset.id", "Dataset": "investigation.id"}
parent_entity_lookup = {"Datafile": "Dataset", "Dataset": "Investigation"}
# keep a cached reader_client for faster queries
# Keep a cached reader_client for faster queries. A reader client is created when
# the first instance of this class is created and is refreshed when a login attempt
# fails (due to an expired session ID)
reader_client = None

def __init__(self, entity_type, filters):
def __init__(self, entity_type: str, filters: List[QueryFilter]) -> None:
self.entity_type = entity_type
self.filters = filters
log.debug(
Expand All @@ -28,7 +57,13 @@ def __init__(self, entity_type, filters):
if not ReaderQueryHandler.reader_client:
self.create_reader_client()

def create_reader_client(self):
def create_reader_client(self) -> ICATClient:
"""
Create a new client (assigning it as a class variable) and login using the
reader's credentials. If the credentials aren't valid, a PythonICATError is
raised (resulting in a 500). The client object is returned
"""

log.info("Creating reader_client")
ReaderQueryHandler.reader_client = ICATClient("datagateway_api")
reader_config = Config.config.datagateway_api.use_reader_for_performance
Expand All @@ -45,18 +80,33 @@ def create_reader_client(self):
raise PythonICATError("Internal error with reader account configuration")
return ReaderQueryHandler.reader_client

def check_eligibility(self):
def check_eligibility(self) -> bool:
"""
This function checks whether the input query can be executed as a 'reader
performance query'. The entity of the query needs to be in `entity_filter_check`
and an appropriate WHERE filter needs to be sought
(using `get_where_filter_for_entity_id_check()`)
"""
log.info("Checking whether query is eligible to go via reader account")
if self.entity_type in ReaderQueryHandler.entity_filter_check.keys():
if self.get_where_filter_for_entity_id_check():
return True

return False

def is_query_eligible_for_reader_performance(self):
def is_query_eligible_for_reader_performance(self) -> bool:
"""
Getter that returns a boolean regarding query eligibility
"""
return self.reader_query_eligible

def get_where_filter_for_entity_id_check(self):
def get_where_filter_for_entity_id_check(self) -> Optional[PythonICATWhereFilter]:
"""
Iterate through the instance's query filters and return a WHERE filter for a
relevant parent entity (e.g. dataset.id or datafile.id). The WHERE filter must
use the 'eq' operator
"""

for query_filter in self.filters:
if (
isinstance(query_filter, PythonICATWhereFilter)
Expand All @@ -72,7 +122,14 @@ def get_where_filter_for_entity_id_check(self):

return None

def is_user_authorised_to_see_entity_id(self, client):
def is_user_authorised_to_see_entity_id(self, client) -> bool:
"""
This function checks whether the user is authorised to see a parent entity (e.g.
if they query /datafiles, whether they can see a particular dataset). A query is
created and sent to ICAT for execution - the query is performed using the
session ID provided in the request
"""

log.info(
"Checking to see if user '%s' can see '%s' = %s",
client.getUserName(),
Expand Down

0 comments on commit a36b607

Please sign in to comment.