Skip to content

Commit

Permalink
Knowledge graph query: graph query endpoints (#2941)
Browse files Browse the repository at this point in the history
* Query parsers in a folder

* Start iterating over graph HTTP API

* Move graph to fixture

* Same relations as in Rust tests

* First iteration on path graph queries

* Don't include endpoint in API docs

* First iteration on /nodes and /relations endpoints

* Move things around

* Split graph tests in files and subtests

* Parser don't need to be async

* Add fuzzy tests

* Connect fuzzy from HTTP to proto

* Refactor fuzzy for request model

* Prepare path requests for filter expressions

* Boolean expressions for path queries

* Add some tests for boolean expressions on paths

* Boolean expressions everywhere!

* Add test for /relations endpoint

* Parser per endpoint and avoid hacky typing stuff

* Fix test after rebase
  • Loading branch information
jotare authored Mar 10, 2025
1 parent 1e8c69c commit 37c331d
Show file tree
Hide file tree
Showing 27 changed files with 1,621 additions and 24 deletions.
4 changes: 4 additions & 0 deletions charts/nucliadb_search/templates/search.vs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ spec:
regex: '^/api/v\d+/kb/[^/]+/suggest'
method:
regex: "GET|OPTIONS"
- uri:
regex: '^/api/v\d+/kb/[^/]+/graph'
method:
regex: "POST|OPTIONS"
retries:
attempts: 3
retryOn: connect-failure
Expand Down
1 change: 1 addition & 0 deletions nucliadb/src/nucliadb/common/nidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ def __init__(self, api_client, searcher_client):
# Searcher methods
self.Search = searcher_client.Search
self.Suggest = searcher_client.Suggest
self.GraphSearch = searcher_client.GraphSearch
self.Paragraphs = searcher_client.Paragraphs
self.Documents = searcher_client.Documents

Expand Down
29 changes: 16 additions & 13 deletions nucliadb/src/nucliadb/search/api/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,19 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from . import ask # noqa
from . import catalog # noqa
from . import feedback # noqa
from . import find # noqa
from . import knowledgebox # noqa
from . import predict_proxy # noqa
from . import search # noqa
from . import suggest # noqa
from . import summarize # noqa
from .resource import ask as ask_resource # noqa
from .resource import search as search_resource # noqa
from .resource import ingestion_agents as ingestion_agents_resource # noqa
from .router import api # noqa
from . import ( # noqa: F401
ask,
catalog,
feedback,
find,
graph,
knowledgebox,
predict_proxy,
search,
suggest,
summarize,
)
from .resource import ask as ask_resource # noqa: F401
from .resource import ingestion_agents as ingestion_agents_resource # noqa: F401
from .resource import search as search_resource # noqa: F401
from .router import api # noqa: F401
2 changes: 1 addition & 1 deletion nucliadb/src/nucliadb/search/api/v1/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from nucliadb.search.search.exceptions import InvalidQueryError
from nucliadb.search.search.merge import fetch_resources
from nucliadb.search.search.pgcatalog import pgcatalog_search
from nucliadb.search.search.query_parser.catalog import parse_catalog
from nucliadb.search.search.query_parser.parsers import parse_catalog
from nucliadb.search.search.utils import (
maybe_log_request_payload,
)
Expand Down
130 changes: 130 additions & 0 deletions nucliadb/src/nucliadb/search/api/v1/graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# Copyright (C) 2021 Bosutech XXI S.L.
#
# nucliadb is offered under the AGPL v3.0 and as commercial software.
# For commercial licensing, contact us at info@nuclia.com.
#
# AGPL:
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from fastapi import Header, Request, Response
from fastapi_versioning import version

from nucliadb.search.api.v1.router import KB_PREFIX, api
from nucliadb.search.requesters.utils import Method, node_query
from nucliadb.search.search.graph_merge import (
build_graph_nodes_response,
build_graph_relations_response,
build_graph_response,
)
from nucliadb.search.search.query_parser.parsers import (
parse_graph_node_search,
parse_graph_relation_search,
parse_graph_search,
)
from nucliadb_models.graph.requests import (
GraphNodesSearchRequest,
GraphRelationsSearchRequest,
GraphSearchRequest,
)
from nucliadb_models.graph.responses import (
GraphNodesSearchResponse,
GraphRelationsSearchResponse,
GraphSearchResponse,
)
from nucliadb_models.resource import NucliaDBRoles
from nucliadb_models.search import (
NucliaDBClientType,
)
from nucliadb_utils.authentication import requires


@api.post(
f"/{KB_PREFIX}/{{kbid}}/graph",
status_code=200,
summary="Search Knowledge Box graph",
description="Search on the Knowledge Box graph and retrieve triplets of vertex-edge-vertex",
response_model_exclude_unset=True,
include_in_schema=False,
tags=["Search"],
)
@requires(NucliaDBRoles.READER)
@version(1)
async def graph_search_knowledgebox(
request: Request,
response: Response,
kbid: str,
item: GraphSearchRequest,
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
x_nucliadb_user: str = Header(""),
x_forwarded_for: str = Header(""),
) -> GraphSearchResponse:
pb_query = parse_graph_search(item)

results, _, _ = await node_query(kbid, Method.GRAPH, pb_query)

return build_graph_response(results)


@api.post(
f"/{KB_PREFIX}/{{kbid}}/graph/nodes",
status_code=200,
summary="Search Knowledge Box graph nodes",
description="Search on the Knowledge Box graph and retrieve nodes (vertices)",
response_model_exclude_unset=True,
include_in_schema=False,
tags=["Search"],
)
@requires(NucliaDBRoles.READER)
@version(1)
async def graph_nodes_search_knowledgebox(
request: Request,
response: Response,
kbid: str,
item: GraphNodesSearchRequest,
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
x_nucliadb_user: str = Header(""),
x_forwarded_for: str = Header(""),
) -> GraphNodesSearchResponse:
pb_query = parse_graph_node_search(item)

results, _, _ = await node_query(kbid, Method.GRAPH, pb_query)

return build_graph_nodes_response(results)


@api.post(
f"/{KB_PREFIX}/{{kbid}}/graph/relations",
status_code=200,
summary="Search Knowledge Box graph relations",
description="Search on the Knowledge Box graph and retrieve relations (edges)",
response_model_exclude_unset=True,
include_in_schema=False,
tags=["Search"],
)
@requires(NucliaDBRoles.READER)
@version(1)
async def graph_relations_search_knowledgebox(
request: Request,
response: Response,
kbid: str,
item: GraphRelationsSearchRequest,
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
x_nucliadb_user: str = Header(""),
x_forwarded_for: str = Header(""),
) -> GraphRelationsSearchResponse:
pb_query = parse_graph_relation_search(item)

results, _, _ = await node_query(kbid, Method.GRAPH, pb_query)

return build_graph_relations_response(results)
17 changes: 16 additions & 1 deletion nucliadb/src/nucliadb/search/requesters/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,14 @@
from nucliadb.common.cluster.utils import get_shard_manager
from nucliadb.search import logger
from nucliadb.search.search.shards import (
graph_search_shard,
query_shard,
suggest_shard,
)
from nucliadb.search.settings import settings
from nucliadb_protos.nodereader_pb2 import (
GraphSearchRequest,
GraphSearchResponse,
SearchRequest,
SearchResponse,
SuggestRequest,
Expand All @@ -50,19 +53,22 @@
class Method(Enum):
SEARCH = auto()
SUGGEST = auto()
GRAPH = auto()


METHODS = {
Method.SEARCH: query_shard,
Method.SUGGEST: suggest_shard,
Method.GRAPH: graph_search_shard,
}

REQUEST_TYPE = Union[SuggestRequest, SearchRequest]
REQUEST_TYPE = Union[SuggestRequest, SearchRequest, GraphSearchRequest]

T = TypeVar(
"T",
SuggestResponse,
SearchResponse,
GraphSearchResponse,
)


Expand All @@ -84,6 +90,15 @@ async def node_query(
) -> tuple[list[SearchResponse], bool, list[tuple[AbstractIndexNode, str]]]: ...


@overload
async def node_query(
kbid: str,
method: Method,
pb_query: GraphSearchRequest,
timeout: Optional[float] = None,
) -> tuple[list[GraphSearchResponse], bool, list[tuple[AbstractIndexNode, str]]]: ...


async def node_query(
kbid: str,
method: Method,
Expand Down
2 changes: 1 addition & 1 deletion nucliadb/src/nucliadb/search/search/find.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
)
from nucliadb.search.search.query import QueryParser
from nucliadb.search.search.query_parser.old_filters import OldFilterParams
from nucliadb.search.search.query_parser.parser import parse_find
from nucliadb.search.search.query_parser.parsers import parse_find
from nucliadb.search.search.rank_fusion import (
RankFusionAlgorithm,
get_rank_fusion,
Expand Down
90 changes: 90 additions & 0 deletions nucliadb/src/nucliadb/search/search/graph_merge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright (C) 2021 Bosutech XXI S.L.
#
# nucliadb is offered under the AGPL v3.0 and as commercial software.
# For commercial licensing, contact us at info@nuclia.com.
#
# AGPL:
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#


from nucliadb.common.models_utils.from_proto import RelationNodeTypePbMap
from nucliadb_models.graph import responses as graph_responses
from nucliadb_models.graph.responses import (
GraphNodesSearchResponse,
GraphRelationsSearchResponse,
GraphSearchResponse,
)
from nucliadb_protos import nodereader_pb2


def build_graph_response(results: list[nodereader_pb2.GraphSearchResponse]) -> GraphSearchResponse:
paths = []
for shard_results in results:
for pb_path in shard_results.graph:
source = shard_results.nodes[pb_path.source]
relation = shard_results.relations[pb_path.relation]
destination = shard_results.nodes[pb_path.destination]

path = graph_responses.GraphPath(
source=graph_responses.GraphNode(
value=source.value,
type=RelationNodeTypePbMap[source.ntype],
group=source.subtype,
),
relation=graph_responses.GraphRelation(
label=relation.label,
),
destination=graph_responses.GraphNode(
value=destination.value,
type=RelationNodeTypePbMap[destination.ntype],
group=destination.subtype,
),
)
paths.append(path)

response = GraphSearchResponse(paths=paths)
return response


def build_graph_nodes_response(
results: list[nodereader_pb2.GraphSearchResponse],
) -> GraphNodesSearchResponse:
nodes = []
for shard_results in results:
for node in shard_results.nodes:
nodes.append(
graph_responses.GraphNode(
value=node.value,
type=RelationNodeTypePbMap[node.ntype],
group=node.subtype,
)
)
response = GraphNodesSearchResponse(nodes=nodes)
return response


def build_graph_relations_response(
results: list[nodereader_pb2.GraphSearchResponse],
) -> GraphRelationsSearchResponse:
relations = []
for shard_results in results:
for relation in shard_results.relations:
relations.append(
graph_responses.GraphRelation(
label=relation.label,
)
)
response = GraphRelationsSearchResponse(relations=relations)
return response
9 changes: 9 additions & 0 deletions nucliadb/src/nucliadb/search/search/query_parser/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
)

from nucliadb_models import search as search_models
from nucliadb_protos import nodereader_pb2

### Retrieval

Expand Down Expand Up @@ -101,3 +102,11 @@ class CatalogQuery(BaseModel):
faceted: list[str]
page_size: int
page_number: int


### Graph


# Right now, we don't need a more generic model for graph queries, we can
# directly use the protobuffer directly
GraphRetrieval = nodereader_pb2.GraphSearchRequest
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright (C) 2021 Bosutech XXI S.L.
#
# nucliadb is offered under the AGPL v3.0 and as commercial software.
# For commercial licensing, contact us at info@nuclia.com.
#
# AGPL:
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

from .catalog import parse_catalog # noqa: F401
from .find import parse_find # noqa: F401
from .graph import parse_graph_node_search, parse_graph_relation_search, parse_graph_search # noqa: F401
Loading

0 comments on commit 37c331d

Please sign in to comment.