From 86f8468745fab1869d8aa9414b6f69c853a88651 Mon Sep 17 00:00:00 2001 From: ylamgarchal Date: Fri, 25 Oct 2024 12:02:00 +0200 Subject: [PATCH] analytics: add search from regexp with operator =~ - field=~1\.2\.* also adding more tests Change-Id: I9de39751c29fbfc9cc8c26679a0e6d26c0eb4a3e --- dci/analytics/query_es_dsl.py | 16 +- dci/api/v1/analytics.py | 62 ++-- tests/analytics/test_query_es_dsl.py | 407 +++++++++++++-------------- tests/api/v1/test_analytics.py | 41 +++ 4 files changed, 292 insertions(+), 234 deletions(-) diff --git a/dci/analytics/query_es_dsl.py b/dci/analytics/query_es_dsl.py index 0112e6177..b370e6d2d 100644 --- a/dci/analytics/query_es_dsl.py +++ b/dci/analytics/query_es_dsl.py @@ -17,7 +17,7 @@ import pyparsing as pp _field = pp.Word(pp.alphanums + "_" + ".") -_value = pp.Word(pp.alphanums + "_" + "-" + "%" + "." + ":") +_value = pp.Word(pp.alphanums + "_" + "-" + "%" + "." + ":" + "\\" + "*" + "?") _word = pp.Word(pp.alphanums + "_" + "-" + "." + " " + ":") _comma = pp.Suppress(pp.Literal(",")) _lp = pp.Suppress(pp.Literal("(")) @@ -29,7 +29,7 @@ _comma_string = _comma + _word _list = _lb + _word + pp.ZeroOrMore(_comma_string) + _rb -_comparison_operators = {"=", "!=", "<=" "<", ">=", ">"} +_comparison_operators = {"=", "!=", "<=" "<", ">=", ">", "=~"} _comparison_operators = pp.oneOf(" ".join(_comparison_operators)) _comparison = _field + _comparison_operators + _value @@ -72,6 +72,16 @@ def _generate_from_operators(parsed_query, handle_nested=False): } } return {"term": {operand_1: operand_2}} + elif operator == "=~": + return { + "regexp": { + operand_1: { + "value": operand_2, + "flags": "ALL", + "case_insensitive": True, + } + } + } elif operator == "not_in": if handle_nested and "." in operand_1: return { @@ -185,4 +195,4 @@ def _generate_es_query(parsed_query, handle_nested=True): def build(query): parsed_query = parse(query) - return {"query": _generate_es_query(parsed_query)} + return _generate_es_query(parsed_query) diff --git a/dci/api/v1/analytics.py b/dci/api/v1/analytics.py index 33e39e03c..d9dfce3b9 100644 --- a/dci/api/v1/analytics.py +++ b/dci/api/v1/analytics.py @@ -40,7 +40,7 @@ logger = logging.getLogger(__name__) -def _handle_pagination(args): +def handle_pagination(args): limit_max = 200 default_limit = 20 default_offset = 0 @@ -63,7 +63,7 @@ def tasks_duration_cumulated(user): export_control.verify_access_to_topic(user, topic) query = "q=topic_id:%s AND remoteci_id:%s" % (args["topic_id"], args["remoteci_id"]) - offset, limit = _handle_pagination(args) + offset, limit = handle_pagination(args) try: res = requests.get( "%s/elasticsearch/tasks_duration_cumulated/_search?%s" @@ -248,33 +248,33 @@ def tasks_pipelines_status(user): ) -@api.route("/analytics/jobs", methods=["GET", "POST"]) -@decorators.login_required -def tasks_jobs(user): - if user.is_not_super_admin() and user.is_not_epm() and user.is_not_read_only_user(): - raise dci_exc.Unauthorized() +def handle_es_sort(args): + field = args.get("sort") + if not field: + return [ + {"created_at": {"order": "desc", "format": "strict_date_optional_time"}} + ] + if field.startswith("-"): + return [{field[1:]: {"order": "desc", "format": "strict_date_optional_time"}}] + else: + return [{field: {"order": "asc", "format": "strict_date_optional_time"}}] - args = flask.request.args.to_dict() - offset, limit = _handle_pagination(args) - query_string = args.get("query") - es_query = qed.build(query_string) - es_query["sort"] = [ - {"created_at": {"order": "desc", "format": "strict_date_optional_time"}} - ] - es_query["from"] = offset - es_query["size"] = limit + +def handle_es_timeframe(query, args): from_date = args.get("from") to_date = args.get("to") if from_date and to_date: - es_query["query"] = { + return { "bool": { "filter": [ {"range": {"created_at": {"gte": from_date, "lte": to_date}}}, - es_query["query"], + query, ] } } + +def handle_includes_excludes(args): _source = {} excludes = args.get("excludes") if excludes: @@ -284,10 +284,36 @@ def tasks_jobs(user): if includes: includes = includes.split(",") _source["includes"] = includes + return _source + +def build_es_query(args): + es_query = {} + + offset, limit = handle_pagination(args) + es_query["from"] = offset + es_query["size"] = limit + + query_string = args.get("query") + es_query["query"] = qed.build(query_string) + + es_query["sort"] = handle_es_sort(args) + + es_query["query"] = handle_es_timeframe(es_query["query"], args) + + _source = handle_includes_excludes(args) if _source: es_query["_source"] = _source + +@api.route("/analytics/jobs", methods=["GET", "POST"]) +@decorators.login_required +def tasks_jobs(user): + if user.is_not_super_admin() and user.is_not_epm() and user.is_not_read_only_user(): + raise dci_exc.Unauthorized() + + es_query = build_es_query() + try: res = requests.get( "%s/analytics/jobs" % (CONFIG["ANALYTICS_URL"]), diff --git a/tests/analytics/test_query_es_dsl.py b/tests/analytics/test_query_es_dsl.py index 7579b8b95..069f593cc 100644 --- a/tests/analytics/test_query_es_dsl.py +++ b/tests/analytics/test_query_es_dsl.py @@ -87,79 +87,55 @@ def test_parse_query_valid(): def test_build(): ret = qed.build("f1=v1") - assert ret == {"query": {"term": {"f1": "v1"}}} + assert ret == {"term": {"f1": "v1"}} ret = qed.build("(f1=v1)") - assert ret == {"query": {"term": {"f1": "v1"}}} + assert ret == {"term": {"f1": "v1"}} ret = qed.build("(f1=v1) and (f2=v2)") - assert ret == { - "query": {"bool": {"filter": [{"term": {"f1": "v1"}}, {"term": {"f2": "v2"}}]}} - } + assert ret == {"bool": {"filter": [{"term": {"f1": "v1"}}, {"term": {"f2": "v2"}}]}} ret = qed.build("((f1=v1) and (f2=v2)) or (f3=v3)") assert ret == { - "query": { - "bool": { - "should": [ - { - "bool": { - "filter": [{"term": {"f1": "v1"}}, {"term": {"f2": "v2"}}] - } - }, - {"term": {"f3": "v3"}}, - ] - } + "bool": { + "should": [ + {"bool": {"filter": [{"term": {"f1": "v1"}}, {"term": {"f2": "v2"}}]}}, + {"term": {"f3": "v3"}}, + ] } } ret = qed.build("((f1=v1) and (f2=v2)) or ((f3=v3) and (f4=v4))") assert ret == { - "query": { - "bool": { - "should": [ - { - "bool": { - "filter": [{"term": {"f1": "v1"}}, {"term": {"f2": "v2"}}] - } - }, - { - "bool": { - "filter": [{"term": {"f3": "v3"}}, {"term": {"f4": "v4"}}] - } - }, - ] - } + "bool": { + "should": [ + {"bool": {"filter": [{"term": {"f1": "v1"}}, {"term": {"f2": "v2"}}]}}, + {"bool": {"filter": [{"term": {"f3": "v3"}}, {"term": {"f4": "v4"}}]}}, + ] } } ret = qed.build("((f1=v1) and ((f2=v2) or (f2=v22))) or ((f3=v3) and (f4=v4))") assert ret == { - "query": { - "bool": { - "should": [ - { - "bool": { - "filter": [ - {"term": {"f1": "v1"}}, - { - "bool": { - "should": [ - {"term": {"f2": "v2"}}, - {"term": {"f2": "v22"}}, - ] - } - }, - ] - } - }, - { - "bool": { - "filter": [{"term": {"f3": "v3"}}, {"term": {"f4": "v4"}}] - } - }, - ] - } + "bool": { + "should": [ + { + "bool": { + "filter": [ + {"term": {"f1": "v1"}}, + { + "bool": { + "should": [ + {"term": {"f2": "v2"}}, + {"term": {"f2": "v22"}}, + ] + } + }, + ] + } + }, + {"bool": {"filter": [{"term": {"f3": "v3"}}, {"term": {"f4": "v4"}}]}}, + ] } } @@ -167,41 +143,39 @@ def test_build(): "((f1=v1) and ((f2=v2) or (f2=v22))) or ((f3=v3) and ((f4=v4) or (f4=v44)))" ) assert ret == { - "query": { - "bool": { - "should": [ - { - "bool": { - "filter": [ - {"term": {"f1": "v1"}}, - { - "bool": { - "should": [ - {"term": {"f2": "v2"}}, - {"term": {"f2": "v22"}}, - ] - } - }, - ] - } - }, - { - "bool": { - "filter": [ - {"term": {"f3": "v3"}}, - { - "bool": { - "should": [ - {"term": {"f4": "v4"}}, - {"term": {"f4": "v44"}}, - ] - } - }, - ] - } - }, - ] - } + "bool": { + "should": [ + { + "bool": { + "filter": [ + {"term": {"f1": "v1"}}, + { + "bool": { + "should": [ + {"term": {"f2": "v2"}}, + {"term": {"f2": "v22"}}, + ] + } + }, + ] + } + }, + { + "bool": { + "filter": [ + {"term": {"f3": "v3"}}, + { + "bool": { + "should": [ + {"term": {"f4": "v4"}}, + {"term": {"f4": "v44"}}, + ] + } + }, + ] + } + }, + ] } } @@ -209,60 +183,54 @@ def test_build(): "(name=vcp) and (((components.type=ocp) and (components.version=4.14.27)) and ((components.type=aspenmesh) and (components.version=1.18.7-am1)))" ) assert ret == { - "query": { - "bool": { - "filter": [ - {"term": {"name": "vcp"}}, - { - "bool": { - "filter": [ - { - "nested": { - "path": "components", - "query": { - "bool": { - "filter": [ - { - "term": { - "components.type": "ocp" - } - }, - { - "term": { - "components.version": "4.14.27" - } - }, - ] - } - }, - } - }, - { - "nested": { - "path": "components", - "query": { - "bool": { - "filter": [ - { - "term": { - "components.type": "aspenmesh" - } - }, - { - "term": { - "components.version": "1.18.7-am1" - } - }, - ] - } - }, - } - }, - ] - } - }, - ] - } + "bool": { + "filter": [ + {"term": {"name": "vcp"}}, + { + "bool": { + "filter": [ + { + "nested": { + "path": "components", + "query": { + "bool": { + "filter": [ + {"term": {"components.type": "ocp"}}, + { + "term": { + "components.version": "4.14.27" + } + }, + ] + } + }, + } + }, + { + "nested": { + "path": "components", + "query": { + "bool": { + "filter": [ + { + "term": { + "components.type": "aspenmesh" + } + }, + { + "term": { + "components.version": "1.18.7-am1" + } + }, + ] + } + }, + } + }, + ] + } + }, + ] } } @@ -270,31 +238,29 @@ def test_build(): "((components.type=cnf-certification-test)) and ((team.name not_in [telcoci, RedHat]))" ) assert ret == { - "query": { - "bool": { - "filter": [ - { - "nested": { - "path": "components", - "query": { - "term": {"components.type": "cnf-certification-test"} - }, - } - }, - { - "nested": { - "path": "team", - "query": { - "bool": { - "must_not": { - "terms": {"team.name": ["telcoci", "RedHat"]} - } + "bool": { + "filter": [ + { + "nested": { + "path": "components", + "query": { + "term": {"components.type": "cnf-certification-test"} + }, + } + }, + { + "nested": { + "path": "team", + "query": { + "bool": { + "must_not": { + "terms": {"team.name": ["telcoci", "RedHat"]} } - }, - } - }, - ] - } + } + }, + } + }, + ] } } @@ -304,26 +270,22 @@ def test_query_1(): "(components.type=cnf-certification-test) and (components.name not_in [telcoci, RedHat])" ) assert ret == { - "query": { - "nested": { - "path": "components", - "query": { - "bool": { - "filter": [ - {"term": {"components.type": "cnf-certification-test"}}, - { - "bool": { - "must_not": { - "terms": { - "components.name": ["telcoci", "RedHat"] - } - } + "nested": { + "path": "components", + "query": { + "bool": { + "filter": [ + {"term": {"components.type": "cnf-certification-test"}}, + { + "bool": { + "must_not": { + "terms": {"components.name": ["telcoci", "RedHat"]} } - }, - ] - } - }, - } + } + }, + ] + } + }, } } @@ -332,35 +294,54 @@ def test_query_2(): ret = qed.build("components.type=cpt_type") assert ret == { - "query": { - "nested": { - "path": "components", - "query": {"term": {"components.type": "cpt_type"}}, - } + "nested": { + "path": "components", + "query": {"term": {"components.type": "cpt_type"}}, } } -def not_yet_test_query_3(): - ret = qed.build("created_at>2024-06-01 and created_at<2024-06-30") +def test_query_build_regex(): + ret = qed.build( + "(((components.name=openshift-vanilla) and (components.type=ocp)) and ((components.type=netapp-trident) and (components.version=~v24\\.02.*)))" + ) assert ret == { - "query": { - "range": { - "created_at": { - "gte": "2024-06-01T00:00:00", - "lte": "2024-06-30T23:59:59", - "format": "strict_date_optional_time", - } - } + "bool": { + "filter": [ + { + "nested": { + "path": "components", + "query": { + "bool": { + "filter": [ + {"term": {"components.name": "openshift-vanilla"}}, + {"term": {"components.type": "ocp"}}, + ] + } + }, + } + }, + { + "nested": { + "path": "components", + "query": { + "bool": { + "filter": [ + {"term": {"components.type": "netapp-trident"}}, + { + "regexp": { + "components.version": { + "case_insensitive": True, + "flags": "ALL", + "value": "v24\\.02.*", + } + } + }, + ] + } + }, + } + }, + ] } } - - -def not_yet_test_query_4(): - """ - { - "size": 50, - "_source": ["created_at","team.name","remoteci.name","pipeline.name","name"], - "query": {} - } - """ diff --git a/tests/api/v1/test_analytics.py b/tests/api/v1/test_analytics.py index d4912abf9..2bb4423aa 100644 --- a/tests/api/v1/test_analytics.py +++ b/tests/api/v1/test_analytics.py @@ -19,6 +19,9 @@ import mock from requests.exceptions import ConnectionError +from dci.api.v1 import analytics +from dci.analytics import query_es_dsl as qed + @mock.patch("dci.api.v1.analytics.requests.get") def test_elasticsearch_ressource_not_found( @@ -78,3 +81,41 @@ def test_tasks_jobs(user): data={"query": "my-query"}, ) assert res.status_code == 401 + + +def test_handle_es_sort(): + res = analytics.handle_es_sort({"sort": "titi"}) + assert res == [{"titi": {"order": "asc", "format": "strict_date_optional_time"}}] + + res = analytics.handle_es_sort({"sort": "-titi"}) + assert res == [{"titi": {"order": "desc", "format": "strict_date_optional_time"}}] + + res = analytics.handle_es_sort({}) + assert res == [ + {"created_at": {"order": "desc", "format": "strict_date_optional_time"}} + ] + + +def test_handle_es_timeframe(): + query = qed.build("name=titi") + res = analytics.handle_es_timeframe( + query, {"from": "2024-01-01", "to": "2024-02-01"} + ) + assert res == { + "bool": { + "filter": [ + {"range": {"created_at": {"gte": "2024-01-01", "lte": "2024-02-01"}}}, + query, + ] + } + } + + +def test_handle_includes_excludes(): + ret = analytics.handle_includes_excludes( + {"includes": "titi,tata", "excludes": "toto"} + ) + assert ret == {"excludes": ["toto"], "includes": ["titi", "tata"]} + + ret = analytics.handle_includes_excludes({}) + assert ret == {}