From cbef7b9274a931dd023e034233961fe67c052519 Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Mon, 19 Jul 2021 14:23:54 +0000 Subject: [PATCH 1/7] #238: Implement join specs on queries with one-to-many ordering - This feature is currently still in PR (therefore unreleased) so I haven't updated the dependency version, I will do so when Python ICAT is released for 0.19.0 --- .../common/filter_order_handler.py | 1 + datagateway_api/common/icat/filters.py | 30 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/datagateway_api/common/filter_order_handler.py b/datagateway_api/common/filter_order_handler.py index 01254da8..02f55ccc 100644 --- a/datagateway_api/common/filter_order_handler.py +++ b/datagateway_api/common/filter_order_handler.py @@ -82,6 +82,7 @@ def clear_python_icat_order_filters(self): for icat_filter in self.filters ): PythonICATOrderFilter.result_order = [] + PythonICATOrderFilter.join_specs = {} def manage_icat_filters(self, filters, query): """ diff --git a/datagateway_api/common/icat/filters.py b/datagateway_api/common/icat/filters.py index 7dd5d806..811a67c8 100644 --- a/datagateway_api/common/icat/filters.py +++ b/datagateway_api/common/icat/filters.py @@ -148,6 +148,9 @@ def apply_filter(self, query): class PythonICATOrderFilter(OrderFilter): # Used to append the order tuples across all filters in a single request result_order = [] + # When 1-many relationships occur, entities should be joined using a LEFT JOIN to + # prevent disappearing results when sorting on DataGateway's table view + join_specs = {} def __init__(self, field, direction): # Python ICAT doesn't automatically uppercase the direction, errors otherwise @@ -165,6 +168,33 @@ def apply_filter(self, query): # relationship raise FilterError(e) + split_fields = self.field.split(".") + for field_pointer in range(len(split_fields)): + # Looking for plural entities but not field names + # This is to avoid adding JOINs to field names such as job's argument field + if ( + split_fields[field_pointer].endswith("s") + and split_fields[field_pointer] != split_fields[-1] + ): + # Length minus 1 is used to omit field names, same reason as above + for join_field_pointer in range(field_pointer, len(split_fields) - 1): + join_field_list = split_fields[ + field_pointer : join_field_pointer + 1 + ] + join_field_str = ".".join(join_field_list) + + PythonICATOrderFilter.join_specs[join_field_str] = "LEFT JOIN" + + log.debug( + "Setting query join specs: %s", PythonICATOrderFilter.join_specs, + ) + try: + query.setJoinSpecs(PythonICATOrderFilter.join_specs) + except (TypeError, ValueError) as e: + raise FilterError(e) + + break + class PythonICATSkipFilter(SkipFilter): def __init__(self, skip_value): From 6cd0d8d8cb8b5365297dd2ab67cb5dbb030fc2b6 Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Mon, 19 Jul 2021 14:25:39 +0000 Subject: [PATCH 2/7] #238: Edit expected data on query test - Not too sure why this is happening, perhaps the generator script fixes? Would've thought it would've brought this up in the relevant PR though but I'll monitor this via CI on this branch --- test/icat/test_query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/icat/test_query.py b/test/icat/test_query.py index 47df71f6..c72ae934 100644 --- a/test/icat/test_query.py +++ b/test/icat/test_query.py @@ -166,13 +166,13 @@ def test_invalid_query_creation(self, icat_client): " API 0", "facility": { "createId": "user", - "createTime": "2011-01-29 06:19:43+00:00", + "createTime": "2002-11-27 06:20:36+00:00", "daysUntilRelease": 10, "description": "Lorem ipsum light source", "fullName": None, "id": 1, "modId": "user", - "modTime": "2008-10-15 12:05:09+00:00", + "modTime": "2005-04-30 19:41:49+00:00", "name": "LILS", "url": None, }, From ad3f52d1ce006c03232ecbd498dfc71cce566f47 Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Mon, 19 Jul 2021 14:39:35 +0000 Subject: [PATCH 3/7] #238: Update comment for accuracy - 1-many related ordering is now supported as of 0.19.0 of Python ICAT --- datagateway_api/common/icat/filters.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datagateway_api/common/icat/filters.py b/datagateway_api/common/icat/filters.py index 811a67c8..6d9ad19c 100644 --- a/datagateway_api/common/icat/filters.py +++ b/datagateway_api/common/icat/filters.py @@ -164,8 +164,7 @@ def apply_filter(self, query): log.info("Adding order filter (for %s)", self.field) query.setOrder(PythonICATOrderFilter.result_order) except ValueError as e: - # Typically either invalid attribute(s) or attribute(s) contains 1-many - # relationship + # Typically invalid attribute(s) raise FilterError(e) split_fields = self.field.split(".") From 1b17a29925eaf7312a150c82d4232fec4a4709f8 Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Mon, 19 Jul 2021 15:02:58 +0000 Subject: [PATCH 4/7] #238: Add test cases for one-to-many related ordering --- test/icat/filters/test_order_filter.py | 44 ++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/test/icat/filters/test_order_filter.py b/test/icat/filters/test_order_filter.py index 75c7c311..2b45e366 100644 --- a/test/icat/filters/test_order_filter.py +++ b/test/icat/filters/test_order_filter.py @@ -30,6 +30,50 @@ def test_result_order_appended(self, icat_query): filter_handler.clear_python_icat_order_filters() + def test_join_specs_added(self, icat_query): + pid_filter = PythonICATOrderFilter("studyInvestigations.study.pid", "ASC") + name_filter = PythonICATOrderFilter( + "investigationInstruments.instrument.name", "DESC", + ) + + filter_handler = FilterOrderHandler() + filter_handler.add_filters([pid_filter, name_filter]) + filter_handler.apply_filters(icat_query) + + assert PythonICATOrderFilter.join_specs == { + "studyInvestigations": "LEFT JOIN", + "studyInvestigations.study": "LEFT JOIN", + "investigationInstruments": "LEFT JOIN", + "investigationInstruments.instrument": "LEFT JOIN", + } + + filter_handler.clear_python_icat_order_filters() + + def test_valid_one_many_related_ordering(self, icat_query): + pid_filter = PythonICATOrderFilter("studyInvestigations.study.pid", "DESC") + filter_handler = FilterOrderHandler() + filter_handler.add_filter(pid_filter) + filter_handler.apply_filters(icat_query) + + assert icat_query.join_specs == { + "studyInvestigations": "LEFT JOIN", + "studyInvestigations.study": "LEFT JOIN", + } + + filter_handler.clear_python_icat_order_filters() + + def test_invalid_one_many_related_ordering(self, icat_query): + pid_filter = PythonICATOrderFilter("studyInvestigations.study.pid", "DESC") + filter_handler = FilterOrderHandler() + filter_handler.add_filter(pid_filter) + + PythonICATOrderFilter.join_specs["testEntities"] = "LEFT JOIN" + + with pytest.raises(FilterError): + filter_handler.apply_filters(icat_query) + + filter_handler.clear_python_icat_order_filters() + def test_filter_applied_to_query(self, icat_query): test_filter = PythonICATOrderFilter("id", "DESC") From 9bb24f3a015b33e7c55afed604f711d4c9c77e2e Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Tue, 20 Jul 2021 19:31:37 +0000 Subject: [PATCH 5/7] #238: Update Python ICAT to 0.19.0 --- poetry.lock | 6 +++--- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index dc11b31f..c55070d0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -679,7 +679,7 @@ six = ">=1.5" [[package]] name = "python-icat" -version = "0.18.1" +version = "0.19.0" description = "Python interface to ICAT and IDS" category = "main" optional = false @@ -881,7 +881,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pyt [metadata] lock-version = "1.1" python-versions = "^3.6" -content-hash = "9d1b52bdef90cf69a288bc94d490d68709398e1e96194e669b04992bd1db0ccb" +content-hash = "50007daac3ccc714a1b88bf352ca296d7aa7d8c2a4bb84996f3a4f3dc083dec1" [metadata.files] aniso8601 = [ @@ -1256,7 +1256,7 @@ python-dateutil = [ {file = "python_dateutil-2.8.1-py2.py3-none-any.whl", hash = "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"}, ] python-icat = [ - {file = "python-icat-0.18.1.tar.gz", hash = "sha256:d8b8fc1a535a78e1aac263594851c2dada798b027c60cc9bb20411fb6835650c"}, + {file = "python-icat-0.19.0.tar.gz", hash = "sha256:16d64bc8a9fa3d0e3861d38c8fb094a2eeee0b5343d33135b49e9743d9dcfc63"}, ] pytz = [ {file = "pytz-2021.1-py2.py3-none-any.whl", hash = "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"}, diff --git a/pyproject.toml b/pyproject.toml index 33cfb6de..9a954592 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ Flask-Cors = "3.0.9" apispec = "3.3.0" flask-swagger-ui = "3.25.0" PyYAML = "5.4" -python-icat = "0.18.1" +python-icat = "0.19.0" suds-community = "^0.8.4" py-object-pool = "^1.1" cachetools = "^4.2.1" From 724acdf39a83dd0524b16ce6f8e8535c6d8743a6 Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Wed, 21 Jul 2021 10:38:12 +0000 Subject: [PATCH 6/7] #238: Use default generator args for CI - This will line up the CI's data with what I have locally and what is on SciGateway preprod's icatdb - This means the failing query test from a commit or two ago will work on CI --- .github/workflows/ci-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml index e80d1617..044d65a6 100644 --- a/.github/workflows/ci-build.yml +++ b/.github/workflows/ci-build.yml @@ -81,7 +81,7 @@ jobs: - name: Add dummy data to icatdb run: | - poetry run python -m util.icat_db_generator -s 4 -y 3 + poetry run python -m util.icat_db_generator # Run Nox tests session, saves and uploads a coverage report to codecov - name: Run Nox tests session From 33f895f0488d494b71e65cead5917aa3bfa640fa Mon Sep 17 00:00:00 2001 From: Matthew Richards Date: Thu, 22 Jul 2021 09:00:39 +0000 Subject: [PATCH 7/7] #238: Update README with info about generator script --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5c77a581..96bc76d6 100644 --- a/README.md +++ b/README.md @@ -764,11 +764,13 @@ Within the repository, there are some useful files which can help with using the ## Database Generator There is a tool to generate mock data into ICAT's database. It is located in `util/icat_db_generator.py`. By default it will generate 20 years worth of data (approx -70,000 entities). The script makes use of `random` and `Faker` and is seeded with a seed -of 1. The seed and number of years of data generated can be changed by using the arg -flags `-s` or `--seed` for the seed, and `-y` or `--years` for the number of years. For -example: `python -m util.icat_db_generator -s 4 -y 10` Would set the seed to 4 and -generate 10 years of data. +70,000 entities). The default arguments will match the data on SciGateway Preprod and +therefore this is usually a good starting point. The script makes use of `random` and +`Faker` and is seeded with a seed of 1. The seed and number of years of data generated +can be changed by using the arg flags `-s` or `--seed` for the seed, and `-y` or +`--years` for the number of years. For example: +`python -m util.icat_db_generator -s 4 -y 10` Would set the seed to 4 and generate 10 +years of data. This uses code from the API's Database Backend, so a suitable `db_url` should be configured in `config.json`.