From 3ed223c10faf6199c4438f66076e6fe898504fe4 Mon Sep 17 00:00:00 2001 From: ylamgarchal Date: Tue, 21 Jan 2025 14:55:26 +0100 Subject: [PATCH] query_es_dsl: ability to query sub nested fields - (tests.name='junit_e2e.xml') and (tests.testsuites.name='my_testsuite_1') and (tests.testsuites.testscases.name='my_testcase_1') Change-Id: I03a4b8de817741f97d50d957de4924b2338f7a76 --- dci/analytics/query_es_dsl.py | 53 ++++++++++++-------- tests/analytics/test_query_es_dsl.py | 73 +++++++++++++++++++++++----- 2 files changed, 96 insertions(+), 30 deletions(-) diff --git a/dci/analytics/query_es_dsl.py b/dci/analytics/query_es_dsl.py index 6c3e3a3a7..f9f1a7684 100644 --- a/dci/analytics/query_es_dsl.py +++ b/dci/analytics/query_es_dsl.py @@ -87,11 +87,15 @@ def parse(q): _op_to_es_op = {"<": "lt", "<=": "lte", ">": "gt", ">=": "gte"} +def _get_prefix(operand): + return ".".join(operand.split(".")[:-1]) + + def _handle_comparison_operator(handle_nested, operator, operand_1, operand_2): if handle_nested and "." in operand_1: return { "nested": { - "path": operand_1.split(".")[0], + "path": _get_prefix(operand_1), "query": {"range": {operand_1: {_op_to_es_op[operator]: operand_2}}}, } } @@ -107,7 +111,7 @@ def _generate_from_operators(parsed_query, handle_nested=False): if handle_nested and "." in operand_1: return { "nested": { - "path": operand_1.split(".")[0], + "path": _get_prefix(operand_1), "query": {"term": {operand_1: operand_2}}, } } @@ -127,13 +131,13 @@ def _generate_from_operators(parsed_query, handle_nested=False): } } if handle_nested and "." in operand_1: - return {"nested": {"path": operand_1.split(".")[0], "query": _regexp}} + return {"nested": {"path": _get_prefix(operand_1), "query": _regexp}} return _regexp elif operator == "not_in": if handle_nested and "." in operand_1: return { "nested": { - "path": operand_1.split(".")[0], + "path": _get_prefix(operand_1), "query": {"bool": {"must_not": {"terms": {operand_1: operand_2}}}}, } } @@ -142,7 +146,7 @@ def _generate_from_operators(parsed_query, handle_nested=False): if handle_nested and "." in operand_1: return { "nested": { - "path": operand_1.split(".")[0], + "path": _get_prefix(operand_1), "query": {"terms": {operand_1: operand_2}}, } } @@ -177,11 +181,8 @@ def _is_nested_query(operands_1, operands_2=None): and isinstance(operands_1[0][0], str) and "." in operands_1[0][0] ): - path = operands_1[0][0].split(".")[0] + path = _get_prefix(operands_1[0][0]) if path: - for o in operands_1: - if o[0].split(".")[0] != path: - return None if operands_2: for o in operands_2: if o[0].split(".")[0] != path: @@ -190,9 +191,17 @@ def _is_nested_query(operands_1, operands_2=None): def _generate_es_query(parsed_query, handle_nested=True): - if isinstance(parsed_query[0], str): + if ( + len(parsed_query) > 0 + and isinstance(parsed_query, list) + and isinstance(parsed_query[0], str) + ): return _generate_from_operators(parsed_query, handle_nested) - if len(parsed_query) == 1: + if ( + len(parsed_query) == 1 + and isinstance(parsed_query, list) + and isinstance(parsed_query[0], list) + ): return _generate_es_query(parsed_query[0], handle_nested) if "or" in parsed_query: @@ -223,17 +232,23 @@ def _generate_es_query(parsed_query, handle_nested=True): operands = _get_logical_operands(parsed_query) path = _is_nested_query(operands) if path: + first_element = operands[0] + _filter = [_generate_es_query(first_element, handle_nested=False)] + operands = operands[1:] + i = 0 + while i < len(operands): + if path == _get_prefix(operands[i][0]): + _filter.append(_generate_es_query(operands[i], handle_nested=False)) + else: + break + i += 1 + if i < len(operands): + _filter.append(_generate_es_query(operands[i:], handle_nested=True)) + return { "nested": { "path": path, - "query": { - "bool": { - "filter": [ - _generate_es_query(o, handle_nested=False) - for o in operands - ] - } - }, + "query": {"bool": {"filter": _filter}}, } } else: diff --git a/tests/analytics/test_query_es_dsl.py b/tests/analytics/test_query_es_dsl.py index e23df55fb..80e66ed34 100644 --- a/tests/analytics/test_query_es_dsl.py +++ b/tests/analytics/test_query_es_dsl.py @@ -390,35 +390,42 @@ def test_query_build_comparison_operator(): def test_nrt_query_build_nested_regexp(): ret = qed.build( - "((components.type=ocp) and (name=~.*upgrade.*) and (team.name=~Intel.+) and (topic.name=OCP-4.16) and (tags in [daily]))" + "(name=~.*upgrade.*) and ((components.type=ocp) and (components.name=openshift)) and (team.name=~Intel.+) and (topic.name=OCP-4.16) and (tags in [daily])" ) assert ret == { "bool": { "filter": [ - { - "nested": { - "path": "components", - "query": {"term": {"components.type": "ocp"}}, - } - }, { "regexp": { "name": { - "value": ".*upgrade.*", - "flags": "ALL", "case_insensitive": True, + "flags": "ALL", + "value": ".*upgrade.*", } } }, + { + "nested": { + "path": "components", + "query": { + "bool": { + "filter": [ + {"term": {"components.type": "ocp"}}, + {"term": {"components.name": "openshift"}}, + ] + } + }, + } + }, { "nested": { "path": "team", "query": { "regexp": { "team.name": { - "value": "Intel.+", - "flags": "ALL", "case_insensitive": True, + "flags": "ALL", + "value": "Intel.+", } } }, @@ -461,3 +468,47 @@ def test_nrt_query_build_quoted_values(): ] } } + + +def test_query_build_nested_field(): + ret = qed.build( + "(tests.name='junit_e2e.xml') and (tests.testsuites.name='my_testsuite_1') and (tests.testsuites.testscases.name='my_testcase_1')" + ) + assert ret == { + "nested": { + "path": "tests", + "query": { + "bool": { + "filter": [ + {"term": {"tests.name": "junit_e2e.xml"}}, + { + "nested": { + "path": "tests.testsuites", + "query": { + "bool": { + "filter": [ + { + "term": { + "tests.testsuites.name": "my_testsuite_1" + } + }, + { + "nested": { + "path": "tests.testsuites.testscases", + "query": { + "term": { + "tests.testsuites.testscases.name": "my_testcase_1" + } + }, + } + }, + ] + } + }, + } + }, + ] + } + }, + } + }