diff --git a/nucliadb/src/nucliadb/writer/api/v1/export_import.py b/nucliadb/src/nucliadb/writer/api/v1/export_import.py index c6d3be7dc0..97e89225e5 100644 --- a/nucliadb/src/nucliadb/writer/api/v1/export_import.py +++ b/nucliadb/src/nucliadb/writer/api/v1/export_import.py @@ -112,7 +112,7 @@ async def kb_create_and_import_endpoint(request: Request): now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") import_kb_config = KnowledgeBoxConfig( title=f"Imported KB - {now}", - learning_configuration=learning_config.dict(), + learning_configuration=learning_config.model_dump(), ) kbid, slug = await create_kb(import_kb_config) diff --git a/nucliadb/tests/conftest.py b/nucliadb/tests/conftest.py index f994cf5068..057a73fa45 100644 --- a/nucliadb/tests/conftest.py +++ b/nucliadb/tests/conftest.py @@ -46,6 +46,9 @@ "tests.ndbfixtures.processing", # useful resources for tests (KBs, resources, ...) "tests.ndbfixtures.resources", + "tests.nucliadb.knowledgeboxes", + # legacy fixtures waiting for a better place + "tests.ndbfixtures.legacy", ] ) diff --git a/nucliadb/tests/fixtures.py b/nucliadb/tests/fixtures.py deleted file mode 100644 index cb3ca6ab04..0000000000 --- a/nucliadb/tests/fixtures.py +++ /dev/null @@ -1,417 +0,0 @@ -# Copyright (C) 2021 Bosutech XXI S.L. -# -# nucliadb is offered under the AGPL v3.0 and as commercial software. -# For commercial licensing, contact us at info@nuclia.com. -# -# AGPL: -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# -import logging -from unittest.mock import AsyncMock, Mock - -import pytest -from grpc import aio -from httpx import AsyncClient - -from nucliadb.common.cluster import manager as cluster_manager -from nucliadb.standalone.settings import Settings -from nucliadb.writer import API_PREFIX -from nucliadb_protos.train_pb2_grpc import TrainStub -from nucliadb_protos.utils_pb2 import Relation, RelationNode -from nucliadb_protos.writer_pb2 import BrokerMessage -from nucliadb_protos.writer_pb2_grpc import WriterStub -from nucliadb_utils.aiopynecone.models import QueryResponse -from nucliadb_utils.utilities import ( - Utility, - clean_pinecone, - clean_utility, - get_pinecone, - get_utility, - set_utility, -) -from tests.utils import inject_message -from tests.utils.dirty_index import mark_dirty, wait_for_sync - -logger = logging.getLogger(__name__) - - -@pytest.fixture(scope="function") -async def nucliadb_reader(nucliadb: Settings): - async with AsyncClient( - headers={"X-NUCLIADB-ROLES": "READER"}, - base_url=f"http://localhost:{nucliadb.http_port}/{API_PREFIX}/v1", - timeout=None, - event_hooks={"request": [wait_for_sync]}, - ) as client: - yield client - - -@pytest.fixture(scope="function") -async def nucliadb_writer(nucliadb: Settings): - async with AsyncClient( - headers={"X-NUCLIADB-ROLES": "WRITER"}, - base_url=f"http://localhost:{nucliadb.http_port}/{API_PREFIX}/v1", - timeout=None, - event_hooks={"request": [mark_dirty]}, - ) as client: - yield client - - -@pytest.fixture(scope="function") -async def nucliadb_manager(nucliadb: Settings): - async with AsyncClient( - headers={"X-NUCLIADB-ROLES": "MANAGER"}, - base_url=f"http://localhost:{nucliadb.http_port}/{API_PREFIX}/v1", - timeout=None, - event_hooks={"request": [mark_dirty]}, - ) as client: - yield client - - -@pytest.fixture(scope="function") -async def knowledgebox(nucliadb_manager: AsyncClient): - resp = await nucliadb_manager.post("/kbs", json={"slug": "knowledgebox"}) - assert resp.status_code == 201 - uuid = resp.json().get("uuid") - - yield uuid - - resp = await nucliadb_manager.delete(f"/kb/{uuid}") - assert resp.status_code == 200 - - -@pytest.fixture(scope="function") -def pinecone_data_plane(): - dp = Mock() - dp.upsert = AsyncMock(return_value=None) - dp.query = AsyncMock( - return_value=QueryResponse( - matches=[], - ) - ) - return dp - - -@pytest.fixture(scope="function") -def pinecone_control_plane(): - cp = Mock() - cp.create_index = AsyncMock(return_value="pinecone-host") - cp.delete_index = AsyncMock(return_value=None) - return cp - - -@pytest.fixture(scope="function") -def pinecone_mock(pinecone_data_plane, pinecone_control_plane): - pinecone_session = get_pinecone() - pinecone_session.data_plane = Mock(return_value=pinecone_data_plane) - pinecone_session.control_plane = Mock(return_value=pinecone_control_plane) - yield - clean_pinecone() - - -@pytest.fixture(scope="function") -async def pinecone_knowledgebox(nucliadb_manager: AsyncClient, pinecone_mock): - resp = await nucliadb_manager.post( - "/kbs", - json={ - "slug": "pinecone_knowledgebox", - "external_index_provider": { - "type": "pinecone", - "api_key": "my-pinecone-api-key", - "serverless_cloud": "aws_us_east_1", - }, - }, - ) - assert resp.status_code == 201 - uuid = resp.json().get("uuid") - - yield uuid - - resp = await nucliadb_manager.delete(f"/kb/{uuid}") - assert resp.status_code == 200 - - -@pytest.fixture(scope="function") -async def nucliadb_grpc(nucliadb: Settings): - stub = WriterStub(aio.insecure_channel(f"localhost:{nucliadb.ingest_grpc_port}")) - return stub - - -@pytest.fixture(scope="function") -async def nucliadb_train(nucliadb: Settings): - stub = TrainStub(aio.insecure_channel(f"localhost:{nucliadb.train_grpc_port}")) - return stub - - -@pytest.fixture(scope="function") -async def knowledge_graph(nucliadb_writer: AsyncClient, nucliadb_grpc: WriterStub, knowledgebox): - resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", - json={ - "title": "Knowledge graph", - "slug": "knowledgegraph", - "summary": "Test knowledge graph", - }, - ) - assert resp.status_code == 201 - rid = resp.json()["uuid"] - - nodes = { - "Animal": RelationNode(value="Animal", ntype=RelationNode.NodeType.ENTITY, subtype=""), - "Batman": RelationNode(value="Batman", ntype=RelationNode.NodeType.ENTITY, subtype=""), - "Becquer": RelationNode(value="Becquer", ntype=RelationNode.NodeType.ENTITY, subtype=""), - "Cat": RelationNode(value="Cat", ntype=RelationNode.NodeType.ENTITY, subtype=""), - "Catwoman": RelationNode(value="Catwoman", ntype=RelationNode.NodeType.ENTITY, subtype=""), - "Eric": RelationNode(value="Eric", ntype=RelationNode.NodeType.ENTITY, subtype=""), - "Fly": RelationNode(value="Fly", ntype=RelationNode.NodeType.ENTITY, subtype=""), - "Gravity": RelationNode(value="Gravity", ntype=RelationNode.NodeType.ENTITY, subtype=""), - "Joan Antoni": RelationNode(value="Joan Antoni", ntype=RelationNode.NodeType.ENTITY, subtype=""), - "Joker": RelationNode(value="Joker", ntype=RelationNode.NodeType.ENTITY, subtype=""), - "Newton": RelationNode(value="Newton", ntype=RelationNode.NodeType.ENTITY, subtype="science"), - "Isaac Newsome": RelationNode( - value="Isaac Newsome", ntype=RelationNode.NodeType.ENTITY, subtype="science" - ), - "Physics": RelationNode(value="Physics", ntype=RelationNode.NodeType.ENTITY, subtype="science"), - "Poetry": RelationNode(value="Poetry", ntype=RelationNode.NodeType.ENTITY, subtype=""), - "Swallow": RelationNode(value="Swallow", ntype=RelationNode.NodeType.ENTITY, subtype=""), - } - - edges = [ - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Batman"], - to=nodes["Catwoman"], - relation_label="love", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Batman"], - to=nodes["Joker"], - relation_label="fight", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Joker"], - to=nodes["Physics"], - relation_label="enjoy", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Catwoman"], - to=nodes["Cat"], - relation_label="imitate", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Cat"], - to=nodes["Animal"], - relation_label="species", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Newton"], - to=nodes["Physics"], - relation_label="study", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Newton"], - to=nodes["Gravity"], - relation_label="formulate", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Isaac Newsome"], - to=nodes["Physics"], - relation_label="study", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Isaac Newsome"], - to=nodes["Gravity"], - relation_label="formulate", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Eric"], - to=nodes["Cat"], - relation_label="like", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Eric"], - to=nodes["Joan Antoni"], - relation_label="collaborate", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Joan Antoni"], - to=nodes["Eric"], - relation_label="collaborate", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Joan Antoni"], - to=nodes["Becquer"], - relation_label="read", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Becquer"], - to=nodes["Poetry"], - relation_label="write", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Becquer"], - to=nodes["Poetry"], - relation_label="like", - ), - Relation( - relation=Relation.RelationType.ABOUT, - source=nodes["Poetry"], - to=nodes["Swallow"], - relation_label="about", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Swallow"], - to=nodes["Animal"], - relation_label="species", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Swallow"], - to=nodes["Fly"], - relation_label="can", - ), - Relation( - relation=Relation.RelationType.ENTITY, - source=nodes["Fly"], - to=nodes["Gravity"], - relation_label="defy", - ), - ] - - bm = BrokerMessage() - bm.uuid = rid - bm.kbid = knowledgebox - bm.relations.extend(edges) - await inject_message(nucliadb_grpc, bm) - await wait_for_sync() - - resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/entitiesgroups", - json={ - "title": "scientist", - "color": "", - "entities": { - "Isaac": {"value": "Isaac"}, - "Isaac Newton": {"value": "Isaac Newton", "represents": ["Newton"]}, - "Isaac Newsome": {"value": "Isaac Newsome"}, - }, - "custom": True, - "group": "scientist", - }, - ) - assert resp.status_code == 200, resp.content - resp = await nucliadb_writer.patch( - f"/kb/{knowledgebox}/entitiesgroup/scientist", - json={"add": {}, "update": {}, "delete": ["Isaac Newsome"]}, - ) - assert resp.status_code == 200, resp.content - resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/entitiesgroups", - json={ - "title": "poet", - "color": "", - "entities": { - "Becquer": { - "value": "Becquer", - "represents": ["Gustavo Adolfo Bécquer"], - }, - "Gustavo Adolfo Bécquer": {"value": "Gustavo Adolfo Bécquer"}, - }, - "custom": True, - "group": "poet", - }, - ) - assert resp.status_code == 200, resp.content - - return (nodes, edges) - - -# TODO: remove after migrating tests/nucliadb/ to ndbfixtures. fixture already -# moved to ndbfixtures.common -@pytest.fixture(scope="function") -async def stream_audit(natsd: str, mocker): - from nucliadb_utils.audit.stream import StreamAuditStorage - from nucliadb_utils.settings import audit_settings - - audit = StreamAuditStorage( - [natsd], - audit_settings.audit_jetstream_target, # type: ignore - audit_settings.audit_partitions, - audit_settings.audit_hash_seed, - ) - await audit.initialize() - - mocker.spy(audit, "send") - mocker.spy(audit.js, "publish") - mocker.spy(audit, "search") - mocker.spy(audit, "chat") - - set_utility(Utility.AUDIT, audit) - yield audit - await audit.finalize() - - -@pytest.fixture(scope="function") -def predict_mock() -> Mock: # type: ignore - predict = get_utility(Utility.PREDICT) - mock = Mock() - set_utility(Utility.PREDICT, mock) - - yield mock - - if predict is None: - clean_utility(Utility.PREDICT) - else: - set_utility(Utility.PREDICT, predict) - - -@pytest.fixture(scope="function") -def metrics_registry(): - import prometheus_client.registry - - for collector in prometheus_client.registry.REGISTRY._names_to_collectors.values(): - if not hasattr(collector, "_metrics"): - continue - collector._metrics.clear() - yield prometheus_client.registry.REGISTRY - - -@pytest.fixture(scope="function") -async def txn(maindb_driver): - async with maindb_driver.transaction() as txn: - yield txn - await txn.abort() - - -@pytest.fixture(scope="function") -async def shard_manager(storage, maindb_driver): - mng = cluster_manager.KBShardManager() - set_utility(Utility.SHARD_MANAGER, mng) - yield mng - clean_utility(Utility.SHARD_MANAGER) diff --git a/nucliadb/tests/ingest/conftest.py b/nucliadb/tests/ingest/conftest.py index afd1b1582e..bf0f56b222 100644 --- a/nucliadb/tests/ingest/conftest.py +++ b/nucliadb/tests/ingest/conftest.py @@ -21,7 +21,6 @@ "pytest_docker_fixtures", "nucliadb_utils.tests.nats", "tests.ingest.fixtures", - "tests.fixtures", "tests.ndbfixtures.maindb", "tests.ndbfixtures.processing", "tests.ndbfixtures.standalone", diff --git a/nucliadb/tests/ndbfixtures/common.py b/nucliadb/tests/ndbfixtures/common.py index 13e110d567..d51b61ccfc 100644 --- a/nucliadb/tests/ndbfixtures/common.py +++ b/nucliadb/tests/ndbfixtures/common.py @@ -18,20 +18,27 @@ # along with this program. If not, see . # from os.path import dirname -from typing import AsyncIterator, Iterator +from typing import AsyncIterable, AsyncIterator, Iterator +from unittest.mock import Mock, patch import pytest from pytest_mock import MockerFixture from nucliadb.common.cluster.manager import KBShardManager from nucliadb.common.maindb.driver import Driver +from nucliadb.search.predict import DummyPredictEngine from nucliadb_utils.audit.audit import AuditStorage from nucliadb_utils.audit.basic import BasicAuditStorage from nucliadb_utils.audit.stream import StreamAuditStorage -from nucliadb_utils.settings import audit_settings +from nucliadb_utils.settings import ( + audit_settings, + nuclia_settings, +) from nucliadb_utils.storages.settings import settings as storage_settings from nucliadb_utils.storages.storage import Storage -from nucliadb_utils.utilities import Utility, clean_utility, set_utility +from nucliadb_utils.utilities import ( + Utility, +) from tests.ndbfixtures.utils import global_utility # Audit @@ -80,14 +87,33 @@ async def local_files(): storage_settings.local_testing_files = f"{dirname(__file__)}" +# Predict + + +@pytest.fixture(scope="function") +def predict_mock() -> Mock: # type: ignore + mock = Mock() + with global_utility(Utility.PREDICT, mock): + yield mock + + +@pytest.fixture(scope="function") +async def dummy_predict() -> AsyncIterable[DummyPredictEngine]: + with ( + patch.object(nuclia_settings, "dummy_predict", True), + ): + predict_util = DummyPredictEngine() + await predict_util.initialize() + + with global_utility(Utility.PREDICT, predict_util): + yield predict_util + + # Shard manager @pytest.fixture(scope="function") async def shard_manager(storage: Storage, maindb_driver: Driver) -> AsyncIterator[KBShardManager]: sm = KBShardManager() - set_utility(Utility.SHARD_MANAGER, sm) - - yield sm - - clean_utility(Utility.SHARD_MANAGER) + with global_utility(Utility.SHARD_MANAGER, sm): + yield sm diff --git a/nucliadb/tests/ndbfixtures/ingest.py b/nucliadb/tests/ndbfixtures/ingest.py index f78284bf48..a397f8c691 100644 --- a/nucliadb/tests/ndbfixtures/ingest.py +++ b/nucliadb/tests/ndbfixtures/ingest.py @@ -70,6 +70,12 @@ async def standalone_nucliadb_ingest_grpc(nucliadb: Settings) -> AsyncIterator[W await channel.close(grace=None) +# alias to ease migration to new ndbfixtures +@pytest.fixture(scope="function") +async def standalone_nucliadb_grpc(standalone_nucliadb_ingest_grpc): + yield standalone_nucliadb_ingest_grpc + + # Utils diff --git a/nucliadb/tests/nucliadb/conftest.py b/nucliadb/tests/ndbfixtures/legacy.py similarity index 61% rename from nucliadb/tests/nucliadb/conftest.py rename to nucliadb/tests/ndbfixtures/legacy.py index 729a27e908..c350580bd6 100644 --- a/nucliadb/tests/nucliadb/conftest.py +++ b/nucliadb/tests/ndbfixtures/legacy.py @@ -17,20 +17,18 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # -pytest_plugins = [ - "pytest_docker_fixtures", - "tests.fixtures", - "tests.ndbfixtures.maindb", - "tests.ndbfixtures.processing", - "tests.ndbfixtures.standalone", - "tests.nucliadb.knowledgeboxes", - "nucliadb_utils.tests.asyncbenchmark", - "nucliadb_utils.tests.nats", - "tests.ndbfixtures.nidx", - "tests.ingest.fixtures", - "nucliadb_utils.tests.fixtures", - "nucliadb_utils.tests.gcs", - "nucliadb_utils.tests.azure", - "nucliadb_utils.tests.s3", - "nucliadb_utils.tests.local", -] + +from typing import AsyncIterator + +import pytest + +from nucliadb.common.maindb.driver import Driver, Transaction + +# Dependents: search, nucliadb + + +@pytest.fixture(scope="function") +async def txn(maindb_driver: Driver) -> AsyncIterator[Transaction]: + async with maindb_driver.transaction() as txn: + yield txn + await txn.abort() diff --git a/nucliadb/tests/ndbfixtures/magic.py b/nucliadb/tests/ndbfixtures/magic.py index be95b4d03c..540b1f5c7b 100644 --- a/nucliadb/tests/ndbfixtures/magic.py +++ b/nucliadb/tests/ndbfixtures/magic.py @@ -43,6 +43,7 @@ ], "nucliadb_train_grpc": [ "component", + "standalone", ], "nucliadb_ingest_grpc": [ "component", diff --git a/nucliadb/tests/ndbfixtures/resources.py b/nucliadb/tests/ndbfixtures/resources.py index d85e90dfe8..12415b6d73 100644 --- a/nucliadb/tests/ndbfixtures/resources.py +++ b/nucliadb/tests/ndbfixtures/resources.py @@ -18,6 +18,7 @@ # along with this program. If not, see . # import asyncio +import logging import time import uuid from typing import AsyncIterator @@ -35,8 +36,15 @@ from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX from nucliadb_protos import utils_pb2 as upb from nucliadb_protos.knowledgebox_pb2 import SemanticModelMetadata +from nucliadb_protos.utils_pb2 import Relation, RelationNode +from nucliadb_protos.writer_pb2 import BrokerMessage +from nucliadb_protos.writer_pb2_grpc import WriterStub from nucliadb_utils.storages.storage import Storage +from tests.utils import inject_message from tests.utils.broker_messages import BrokerMessageBuilder +from tests.utils.dirty_index import wait_for_sync + +logger = logging.getLogger(__name__) @pytest.fixture(scope="function") @@ -60,6 +68,18 @@ async def knowledgebox( # await KnowledgeBox.purge(maindb_driver, kbid) +@pytest.fixture(scope="function") +async def standalone_knowledgebox(nucliadb_writer_manager: AsyncClient): + resp = await nucliadb_writer_manager.post("/kbs", json={"slug": "knowledgebox"}) + assert resp.status_code == 201 + uuid = resp.json().get("uuid") + + yield uuid + + resp = await nucliadb_writer_manager.delete(f"/kb/{uuid}") + assert resp.status_code == 200 + + # FIXME: this is a weird situation, we can use a hosted-like nucliadb while this # creates a KB as it was onprem. The end result should not change much but still, is # something we may want to fix @@ -164,3 +184,204 @@ async def simple_resources( await asyncio.sleep(0.1) yield knowledgebox, resource_ids + + +# Only supported for standalone (as it depends on standalone_knowledgebox fixture) +@pytest.fixture(scope="function") +async def knowledge_graph( + nucliadb_writer: AsyncClient, nucliadb_ingest_grpc: WriterStub, standalone_knowledgebox: str +): + resp = await nucliadb_writer.post( + f"/kb/{standalone_knowledgebox}/resources", + json={ + "title": "Knowledge graph", + "slug": "knowledgegraph", + "summary": "Test knowledge graph", + }, + ) + assert resp.status_code == 201 + rid = resp.json()["uuid"] + + nodes = { + "Animal": RelationNode(value="Animal", ntype=RelationNode.NodeType.ENTITY, subtype=""), + "Batman": RelationNode(value="Batman", ntype=RelationNode.NodeType.ENTITY, subtype=""), + "Becquer": RelationNode(value="Becquer", ntype=RelationNode.NodeType.ENTITY, subtype=""), + "Cat": RelationNode(value="Cat", ntype=RelationNode.NodeType.ENTITY, subtype=""), + "Catwoman": RelationNode(value="Catwoman", ntype=RelationNode.NodeType.ENTITY, subtype=""), + "Eric": RelationNode(value="Eric", ntype=RelationNode.NodeType.ENTITY, subtype=""), + "Fly": RelationNode(value="Fly", ntype=RelationNode.NodeType.ENTITY, subtype=""), + "Gravity": RelationNode(value="Gravity", ntype=RelationNode.NodeType.ENTITY, subtype=""), + "Joan Antoni": RelationNode(value="Joan Antoni", ntype=RelationNode.NodeType.ENTITY, subtype=""), + "Joker": RelationNode(value="Joker", ntype=RelationNode.NodeType.ENTITY, subtype=""), + "Newton": RelationNode(value="Newton", ntype=RelationNode.NodeType.ENTITY, subtype="science"), + "Isaac Newsome": RelationNode( + value="Isaac Newsome", ntype=RelationNode.NodeType.ENTITY, subtype="science" + ), + "Physics": RelationNode(value="Physics", ntype=RelationNode.NodeType.ENTITY, subtype="science"), + "Poetry": RelationNode(value="Poetry", ntype=RelationNode.NodeType.ENTITY, subtype=""), + "Swallow": RelationNode(value="Swallow", ntype=RelationNode.NodeType.ENTITY, subtype=""), + } + + edges = [ + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Batman"], + to=nodes["Catwoman"], + relation_label="love", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Batman"], + to=nodes["Joker"], + relation_label="fight", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Joker"], + to=nodes["Physics"], + relation_label="enjoy", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Catwoman"], + to=nodes["Cat"], + relation_label="imitate", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Cat"], + to=nodes["Animal"], + relation_label="species", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Newton"], + to=nodes["Physics"], + relation_label="study", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Newton"], + to=nodes["Gravity"], + relation_label="formulate", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Isaac Newsome"], + to=nodes["Physics"], + relation_label="study", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Isaac Newsome"], + to=nodes["Gravity"], + relation_label="formulate", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Eric"], + to=nodes["Cat"], + relation_label="like", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Eric"], + to=nodes["Joan Antoni"], + relation_label="collaborate", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Joan Antoni"], + to=nodes["Eric"], + relation_label="collaborate", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Joan Antoni"], + to=nodes["Becquer"], + relation_label="read", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Becquer"], + to=nodes["Poetry"], + relation_label="write", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Becquer"], + to=nodes["Poetry"], + relation_label="like", + ), + Relation( + relation=Relation.RelationType.ABOUT, + source=nodes["Poetry"], + to=nodes["Swallow"], + relation_label="about", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Swallow"], + to=nodes["Animal"], + relation_label="species", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Swallow"], + to=nodes["Fly"], + relation_label="can", + ), + Relation( + relation=Relation.RelationType.ENTITY, + source=nodes["Fly"], + to=nodes["Gravity"], + relation_label="defy", + ), + ] + + bm = BrokerMessage() + bm.uuid = rid + bm.kbid = standalone_knowledgebox + bm.relations.extend(edges) + await inject_message(nucliadb_ingest_grpc, bm) + await wait_for_sync() + + resp = await nucliadb_writer.post( + f"/kb/{standalone_knowledgebox}/entitiesgroups", + json={ + "title": "scientist", + "color": "", + "entities": { + "Isaac": {"value": "Isaac"}, + "Isaac Newton": {"value": "Isaac Newton", "represents": ["Newton"]}, + "Isaac Newsome": {"value": "Isaac Newsome"}, + }, + "custom": True, + "group": "scientist", + }, + ) + assert resp.status_code == 200, resp.content + resp = await nucliadb_writer.patch( + f"/kb/{standalone_knowledgebox}/entitiesgroup/scientist", + json={"add": {}, "update": {}, "delete": ["Isaac Newsome"]}, + ) + assert resp.status_code == 200, resp.content + resp = await nucliadb_writer.post( + f"/kb/{standalone_knowledgebox}/entitiesgroups", + json={ + "title": "poet", + "color": "", + "entities": { + "Becquer": { + "value": "Becquer", + "represents": ["Gustavo Adolfo Bécquer"], + }, + "Gustavo Adolfo Bécquer": {"value": "Gustavo Adolfo Bécquer"}, + }, + "custom": True, + "group": "poet", + }, + ) + assert resp.status_code == 200, resp.content + + return (nodes, edges) diff --git a/nucliadb/tests/ndbfixtures/search.py b/nucliadb/tests/ndbfixtures/search.py index 559fb46c1d..eae1b614ee 100644 --- a/nucliadb/tests/ndbfixtures/search.py +++ b/nucliadb/tests/ndbfixtures/search.py @@ -18,7 +18,6 @@ # along with this program. If not, see . import asyncio -from typing import AsyncIterable from unittest.mock import patch import pytest @@ -31,7 +30,6 @@ from nucliadb.ingest.cache import clear_ingest_cache from nucliadb.ingest.settings import settings as ingest_settings from nucliadb.search.app import application -from nucliadb.search.predict import DummyPredictEngine from nucliadb_models.resource import NucliaDBRoles from nucliadb_protos.nodereader_pb2 import GetShardRequest from nucliadb_protos.noderesources_pb2 import Shard @@ -45,11 +43,10 @@ from nucliadb_utils.tests import free_port from nucliadb_utils.transaction import TransactionUtility from nucliadb_utils.utilities import ( - Utility, clear_global_cache, ) from tests.ingest.fixtures import broker_resource -from tests.ndbfixtures.utils import create_api_client_factory, global_utility +from tests.ndbfixtures.utils import create_api_client_factory # Main fixtures @@ -88,18 +85,6 @@ async def cluster_nucliadb_search( # Rest, TODO keep cleaning -@pytest.fixture(scope="function") -async def dummy_predict() -> AsyncIterable[DummyPredictEngine]: - with ( - patch.object(nuclia_settings, "dummy_predict", True), - ): - predict_util = DummyPredictEngine() - await predict_util.initialize() - - with global_utility(Utility.PREDICT, predict_util): - yield predict_util - - @pytest.fixture(scope="function") async def test_search_resource( indexing_utility_registered, @@ -190,13 +175,3 @@ async def wait_for_shard(knowledgebox_ingest: str, count: int) -> str: # Wait an extra couple of seconds for reader/searcher to catch up await asyncio.sleep(2) return knowledgebox_ingest - - -# Dependencies from tests/fixtures.py - - -@pytest.fixture(scope="function") -async def txn(maindb_driver): - async with maindb_driver.transaction() as txn: - yield txn - await txn.abort() diff --git a/nucliadb/tests/ndbfixtures/train.py b/nucliadb/tests/ndbfixtures/train.py index 4b13ee10af..b24a4be975 100644 --- a/nucliadb/tests/ndbfixtures/train.py +++ b/nucliadb/tests/ndbfixtures/train.py @@ -79,7 +79,16 @@ class TrainGrpcServer: @pytest.fixture(scope="function") async def component_nucliadb_train_grpc(train_grpc_server: TrainGrpcServer) -> AsyncIterator[TrainStub]: channel = aio.insecure_channel(f"localhost:{train_grpc_server.port}") - yield TrainStub(channel) + stub = TrainStub(channel) + yield stub + await channel.close(grace=None) + + +@pytest.fixture(scope="function") +async def standalone_nucliadb_train_grpc(nucliadb: Settings) -> AsyncIterator[TrainStub]: + channel = aio.insecure_channel(f"localhost:{nucliadb.train_grpc_port}") + stub = TrainStub(channel) + yield stub await channel.close(grace=None) diff --git a/nucliadb/tests/nucliadb/benchmarks/test_search.py b/nucliadb/tests/nucliadb/benchmarks/test_search.py index 8edf2ad1e3..5f5971ef98 100644 --- a/nucliadb/tests/nucliadb/benchmarks/test_search.py +++ b/nucliadb/tests/nucliadb/benchmarks/test_search.py @@ -41,19 +41,20 @@ disable_gc=True, warmup=False, ) +@pytest.mark.deploy_modes("standalone") async def test_search_returns_labels( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, asyncbenchmark: AsyncBenchmarkFixture, ): - bm = broker_resource_with_classifications(knowledgebox) - await inject_message(nucliadb_grpc, bm) + bm = broker_resource_with_classifications(standalone_knowledgebox) + await inject_message(nucliadb_ingest_grpc, bm) resp = await asyncbenchmark( nucliadb_reader.get, - f"/kb/{knowledgebox}/search?query=Some&show=extracted&extracted=metadata", + f"/kb/{standalone_knowledgebox}/search?query=Some&show=extracted&extracted=metadata", ) assert resp.status_code == 200 @@ -67,11 +68,12 @@ async def test_search_returns_labels( disable_gc=True, warmup=False, ) +@pytest.mark.deploy_modes("standalone") async def test_search_relations( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, knowledge_graph, asyncbenchmark: AsyncBenchmarkFixture, ): @@ -86,7 +88,7 @@ async def test_search_relations( resp = await asyncbenchmark( nucliadb_reader.get, - f"/kb/{knowledgebox}/search", + f"/kb/{standalone_knowledgebox}/search", params={ "features": "relations", "query": "What relates Newton and Becquer?", diff --git a/nucliadb/tests/nucliadb/integration/common/cluster/test_rebalance.py b/nucliadb/tests/nucliadb/integration/common/cluster/test_rebalance.py index 4399f47571..27da7e56ce 100644 --- a/nucliadb/tests/nucliadb/integration/common/cluster/test_rebalance.py +++ b/nucliadb/tests/nucliadb/integration/common/cluster/test_rebalance.py @@ -37,16 +37,17 @@ async def app_context(natsd, storage, nucliadb): await ctx.finalize() +@pytest.mark.deploy_modes("standalone") async def test_rebalance_kb_shards( app_context, - knowledgebox, + standalone_knowledgebox, nucliadb_writer: AsyncClient, - nucliadb_manager: AsyncClient, + nucliadb_reader_manager: AsyncClient, ): count = 10 for i in range(count): resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "slug": f"myresource-{i}", "title": f"My Title {i}", @@ -60,20 +61,20 @@ async def test_rebalance_kb_shards( ) assert resp.status_code == 201 - counters1_resp = await nucliadb_manager.get(f"/kb/{knowledgebox}/counters") - shards1_resp = await nucliadb_manager.get(f"/kb/{knowledgebox}/shards") + counters1_resp = await nucliadb_reader_manager.get(f"/kb/{standalone_knowledgebox}/counters") + shards1_resp = await nucliadb_reader_manager.get(f"/kb/{standalone_knowledgebox}/shards") counters1 = counters1_resp.json() shards1 = shards1_resp.json() assert len(shards1["shards"]) == 1 with patch.object(settings, "max_shard_paragraphs", counters1["paragraphs"] / 2): - await rebalance.rebalance_kb(app_context, knowledgebox) + await rebalance.rebalance_kb(app_context, standalone_knowledgebox) - shards2_resp = await nucliadb_manager.get(f"/kb/{knowledgebox}/shards") + shards2_resp = await nucliadb_reader_manager.get(f"/kb/{standalone_knowledgebox}/shards") shards2 = shards2_resp.json() assert len(shards2["shards"]) == 2 # if we run it again, we should get another shard with patch.object(settings, "max_shard_paragraphs", counters1["paragraphs"] / 2): - await rebalance.rebalance_kb(app_context, knowledgebox) + await rebalance.rebalance_kb(app_context, standalone_knowledgebox) diff --git a/nucliadb/tests/nucliadb/integration/common/cluster/test_rollover.py b/nucliadb/tests/nucliadb/integration/common/cluster/test_rollover.py index 2bfb23b487..379cab114a 100644 --- a/nucliadb/tests/nucliadb/integration/common/cluster/test_rollover.py +++ b/nucliadb/tests/nucliadb/integration/common/cluster/test_rollover.py @@ -39,27 +39,33 @@ async def app_context(natsd, storage, nucliadb): await ctx.finalize() +@pytest.mark.deploy_modes("standalone") async def test_rollover_kb_index( app_context: ApplicationContext, - knowledgebox, + standalone_knowledgebox, nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - nucliadb_manager: AsyncClient, + nucliadb_reader_manager: AsyncClient, ): await _test_rollover_kb_index( - app_context, knowledgebox, nucliadb_writer, nucliadb_reader, nucliadb_manager + app_context, standalone_knowledgebox, nucliadb_writer, nucliadb_reader, nucliadb_reader_manager ) +@pytest.mark.deploy_modes("standalone") async def test_rollover_kb_index_with_vectorsets( app_context: ApplicationContext, knowledgebox_with_vectorsets: str, nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - nucliadb_manager: AsyncClient, + nucliadb_reader_manager: AsyncClient, ): await _test_rollover_kb_index( - app_context, knowledgebox_with_vectorsets, nucliadb_writer, nucliadb_reader, nucliadb_manager + app_context, + knowledgebox_with_vectorsets, + nucliadb_writer, + nucliadb_reader, + nucliadb_reader_manager, ) @@ -68,7 +74,7 @@ async def _test_rollover_kb_index( kbid: str, nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - nucliadb_manager: AsyncClient, + nucliadb_reader_manager: AsyncClient, ): count = 20 for i in range(count): @@ -83,13 +89,13 @@ async def _test_rollover_kb_index( ) assert resp.status_code == 201 - resp = await nucliadb_manager.get(f"/kb/{kbid}/shards") + resp = await nucliadb_reader_manager.get(f"/kb/{kbid}/shards") assert resp.status_code == 200, resp.text shards_body1 = resp.json() await rollover.rollover_kb_index(app_context, kbid) - resp = await nucliadb_manager.get(f"/kb/{kbid}/shards") + resp = await nucliadb_reader_manager.get(f"/kb/{kbid}/shards") assert resp.status_code == 200, resp.text shards_body2 = resp.json() # check that shards have changed @@ -107,6 +113,7 @@ async def _test_rollover_kb_index( assert len(body["resources"]) == count +@pytest.mark.deploy_modes("standalone") async def test_rollover_kb_index_does_a_clean_cutover( app_context, knowledgebox, @@ -124,12 +131,13 @@ async def get_kb_shards(kbid: str): assert shards2.extra == {} +@pytest.mark.deploy_modes("standalone") async def test_rollover_kb_index_handles_changes_in_between( app_context, knowledgebox, nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - nucliadb_manager: AsyncClient, + nucliadb_reader_manager: AsyncClient, ): count = 50 resources = [] diff --git a/nucliadb/tests/nucliadb/integration/migrator/test_migrator.py b/nucliadb/tests/nucliadb/integration/migrator/test_migrator.py index 950213bae0..414477a2e3 100644 --- a/nucliadb/tests/nucliadb/integration/migrator/test_migrator.py +++ b/nucliadb/tests/nucliadb/integration/migrator/test_migrator.py @@ -20,6 +20,7 @@ import uuid import pytest +from httpx import AsyncClient from nucliadb.migrator import migrator from nucliadb.migrator.context import ExecutionContext @@ -38,12 +39,13 @@ async def execution_context(natsd, storage, nucliadb): await context.finalize() -async def test_migrate_kb(execution_context: ExecutionContext, knowledgebox): +@pytest.mark.deploy_modes("standalone") +async def test_migrate_kb(execution_context: ExecutionContext, standalone_knowledgebox): # this will test run all available migrations - await execution_context.data_manager.update_kb_info(kbid=knowledgebox, current_version=-1) + await execution_context.data_manager.update_kb_info(kbid=standalone_knowledgebox, current_version=-1) await execution_context.data_manager.update_global_info(current_version=0) - kb_info = await execution_context.data_manager.get_kb_info(kbid=knowledgebox) + kb_info = await execution_context.data_manager.get_kb_info(kbid=standalone_knowledgebox) assert kb_info is not None assert kb_info.current_version == -1 global_info = await execution_context.data_manager.get_global_info() @@ -53,7 +55,7 @@ async def test_migrate_kb(execution_context: ExecutionContext, knowledgebox): # other tests can be so slow and cumbersome to maintain await migrator.run(execution_context, target_version=1) - kb_info = await execution_context.data_manager.get_kb_info(kbid=knowledgebox) + kb_info = await execution_context.data_manager.get_kb_info(kbid=standalone_knowledgebox) assert kb_info is not None assert kb_info.current_version == 1 global_info = await execution_context.data_manager.get_global_info() @@ -61,20 +63,21 @@ async def test_migrate_kb(execution_context: ExecutionContext, knowledgebox): @pytest.fixture(scope="function") -async def two_knowledgeboxes(nucliadb_manager): +async def two_knowledgeboxes(nucliadb_writer_manager: AsyncClient): kbs = [] for _ in range(2): - resp = await nucliadb_manager.post("/kbs", json={"slug": uuid.uuid4().hex}) + resp = await nucliadb_writer_manager.post("/kbs", json={"slug": uuid.uuid4().hex}) assert resp.status_code == 201 kbs.append(resp.json().get("uuid")) yield kbs for kb in kbs: - resp = await nucliadb_manager.delete(f"/kb/{kb}") + resp = await nucliadb_writer_manager.delete(f"/kb/{kb}") assert resp.status_code == 200 +@pytest.mark.deploy_modes("standalone") async def test_run_all_kb_migrations(execution_context: ExecutionContext, two_knowledgeboxes): # Set migration version to -1 for all knowledgeboxes for kbid in two_knowledgeboxes: @@ -100,6 +103,7 @@ async def test_run_all_kb_migrations(execution_context: ExecutionContext, two_kn assert global_info.current_version == 1 +@pytest.mark.deploy_modes("standalone") async def test_run_kb_rollovers(execution_context: ExecutionContext, two_knowledgeboxes): # Set migration version to -1 for all knowledgeboxes for kbid in two_knowledgeboxes: diff --git a/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_rank_fusion.py b/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_rank_fusion.py index df0f234b2c..911fdfa7ae 100644 --- a/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_rank_fusion.py +++ b/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_rank_fusion.py @@ -39,6 +39,7 @@ (ReciprocalRankFusion().model_dump(), {SCORE_TYPE.BM25}), ], ) +@pytest.mark.deploy_modes("standalone") async def test_rank_fusion( nucliadb_reader: AsyncClient, philosophy_books_kb: str, @@ -91,6 +92,7 @@ def get_score_types(results: KnowledgeboxFindResults) -> set[SCORE_TYPE]: return score_types +@pytest.mark.deploy_modes("standalone") async def test_reciprocal_rank_fusion_requests_more_results( nucliadb_reader: AsyncClient, philosophy_books_kb: str, diff --git a/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_reranker.py b/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_reranker.py index d592b59919..c5f76f74fe 100644 --- a/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_reranker.py +++ b/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_reranker.py @@ -37,6 +37,7 @@ PredictReranker(window=50).model_dump(), ], ) +@pytest.mark.deploy_modes("standalone") async def test_reranker( nucliadb_reader: AsyncClient, philosophy_books_kb: str, @@ -80,6 +81,7 @@ async def test_reranker( (PredictReranker(window=5 * 2).model_dump(), 5 * 2), ], ) +@pytest.mark.deploy_modes("standalone") async def test_predict_reranker_requests_more_results( nucliadb_reader: AsyncClient, philosophy_books_kb: str, diff --git a/nucliadb/tests/nucliadb/integration/search/test_autofilters.py b/nucliadb/tests/nucliadb/integration/search/test_autofilters.py index 3789a2a037..efaa83a3ba 100644 --- a/nucliadb/tests/nucliadb/integration/search/test_autofilters.py +++ b/nucliadb/tests/nucliadb/integration/search/test_autofilters.py @@ -29,15 +29,16 @@ from tests.utils.predict import predict_query_hook +@pytest.mark.deploy_modes("standalone") async def test_autofilters_are_returned( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox: str, knowledge_graph, mocked_predict, ): resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/search", + f"/kb/{standalone_knowledgebox}/search", params={ "query": "What relates Newton and Becquer?", }, @@ -46,7 +47,7 @@ async def test_autofilters_are_returned( assert resp.json()["autofilters"] == [] resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", params={ "autofilter": True, "query": "What relates Newton and Becquer?", diff --git a/nucliadb/tests/nucliadb/integration/search/test_filters.py b/nucliadb/tests/nucliadb/integration/search/test_filters.py index c05064285c..b16a59f6fc 100644 --- a/nucliadb/tests/nucliadb/integration/search/test_filters.py +++ b/nucliadb/tests/nucliadb/integration/search/test_filters.py @@ -262,7 +262,7 @@ def broker_message_with_labels(kbid): return bm -async def create_test_labelsets(nucliadb_writer, kbid: str): +async def create_test_labelsets(nucliadb_writer: AsyncClient, kbid: str): for kind, _label in ( (LabelSetKind.RESOURCES, ClassificationLabels.RESOURCE_ANNOTATED), (LabelSetKind.RESOURCES, ClassificationLabels.FIELD_DETECTED), @@ -282,16 +282,17 @@ async def create_test_labelsets(nucliadb_writer, kbid: str): @pytest.fixture(scope="function") async def kbid( - nucliadb_grpc: WriterStub, - nucliadb_writer, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + nucliadb_writer: AsyncClient, + standalone_knowledgebox, ): - await create_test_labelsets(nucliadb_writer, knowledgebox) - await inject_message(nucliadb_grpc, broker_message_with_entities(knowledgebox)) - await inject_message(nucliadb_grpc, broker_message_with_labels(knowledgebox)) - return knowledgebox + await create_test_labelsets(nucliadb_writer, standalone_knowledgebox) + await inject_message(nucliadb_ingest_grpc, broker_message_with_entities(standalone_knowledgebox)) + await inject_message(nucliadb_ingest_grpc, broker_message_with_labels(standalone_knowledgebox)) + return standalone_knowledgebox +@pytest.mark.deploy_modes("standalone") async def test_filtering_before_and_after_reindexing( app_context, nucliadb_reader: AsyncClient, kbid: str ): diff --git a/nucliadb/tests/nucliadb/integration/search/test_filters_expression.py b/nucliadb/tests/nucliadb/integration/search/test_filters_expression.py index 72e6972049..caa6c2c35f 100644 --- a/nucliadb/tests/nucliadb/integration/search/test_filters_expression.py +++ b/nucliadb/tests/nucliadb/integration/search/test_filters_expression.py @@ -18,9 +18,15 @@ # along with this program. If not, see . # +import pytest +from httpx import AsyncClient -async def test_filtering_expression(nucliadb_reader, nucliadb_writer, knowledgebox): - kbid = knowledgebox + +@pytest.mark.deploy_modes("standalone") +async def test_filtering_expression( + nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, standalone_knowledgebox: str +): + kbid = standalone_knowledgebox slug_to_uuid = {} # Create 3 resources in different folders @@ -91,7 +97,10 @@ async def test_filtering_expression(nucliadb_reader, nucliadb_writer, knowledgeb assert found_uuids == expected_uuids -async def test_filtering_expression_validation(nucliadb_reader, nucliadb_writer): +@pytest.mark.deploy_modes("standalone") +async def test_filtering_expression_validation( + nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient +): # Make sure we only allow one operator per filter resp = await nucliadb_reader.post( f"/kb/foobar/find", diff --git a/nucliadb/tests/nucliadb/integration/search/test_hidden.py b/nucliadb/tests/nucliadb/integration/search/test_hidden.py index 3017f2452f..be3ac5a6cf 100644 --- a/nucliadb/tests/nucliadb/integration/search/test_hidden.py +++ b/nucliadb/tests/nucliadb/integration/search/test_hidden.py @@ -27,68 +27,75 @@ from tests.utils import broker_resource_with_title_paragraph, inject_message -async def create_resource(kbid, nucliadb_grpc): +async def create_resource(kbid: str, nucliadb_ingest_grpc: WriterStub): message = broker_resource_with_title_paragraph(kbid) - await inject_message(nucliadb_grpc, message) + await inject_message(nucliadb_ingest_grpc, message) return message.uuid +@pytest.mark.deploy_modes("standalone") async def test_hidden_search( app_context, nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - nucliadb_manager: AsyncClient, - knowledgebox: str, + nucliadb_ingest_grpc: WriterStub, + nucliadb_writer_manager: AsyncClient, + standalone_knowledgebox: str, ): - resp = await nucliadb_manager.patch(f"/kb/{knowledgebox}", json={"hidden_resources_enabled": True}) + resp = await nucliadb_writer_manager.patch( + f"/kb/{standalone_knowledgebox}", json={"hidden_resources_enabled": True} + ) assert resp.status_code == 200 - r1 = await create_resource(knowledgebox, nucliadb_grpc) - r2 = await create_resource(knowledgebox, nucliadb_grpc) + r1 = await create_resource(standalone_knowledgebox, nucliadb_ingest_grpc) + r2 = await create_resource(standalone_knowledgebox, nucliadb_ingest_grpc) # Both resources appear in searches - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search") assert resp.status_code == 200 assert resp.json()["resources"].keys() == {r1, r2} - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=title") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=title") assert resp.status_code == 200 assert set([r["rid"] for r in resp.json()["paragraphs"]["results"]]) == {r1, r2} # Hide r1 - resp = await nucliadb_writer.patch(f"/kb/{knowledgebox}/resource/{r1}", json={"hidden": True}) + resp = await nucliadb_writer.patch( + f"/kb/{standalone_knowledgebox}/resource/{r1}", json={"hidden": True} + ) assert resp.status_code == 200 await asyncio.sleep(0.5) # Only r2 appears on search - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search") assert resp.status_code == 200 assert resp.json()["resources"].keys() == {r2} - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=title") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=title") assert resp.status_code == 200 assert set([r["rid"] for r in resp.json()["paragraphs"]["results"]]) == {r2} # Unless show_hidden is passed, then both resources are returned - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?show_hidden=true") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?show_hidden=true") assert resp.status_code == 200 assert resp.json()["resources"].keys() == {r1, r2} - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=title&show_hidden=true") + resp = await nucliadb_reader.get( + f"/kb/{standalone_knowledgebox}/suggest?query=title&show_hidden=true" + ) assert resp.status_code == 200 assert set([r["rid"] for r in resp.json()["paragraphs"]["results"]]) == {r1, r2} # Test catalog ternary filter - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/catalog") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/catalog") assert resp.status_code == 200 assert resp.json()["resources"].keys() == {r1, r2} - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/catalog?hidden=true") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/catalog?hidden=true") assert resp.status_code == 200 assert resp.json()["resources"].keys() == {r1} - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/catalog?hidden=false") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/catalog?hidden=false") assert resp.status_code == 200 assert resp.json()["resources"].keys() == {r2} diff --git a/nucliadb/tests/nucliadb/integration/search/test_search.py b/nucliadb/tests/nucliadb/integration/search/test_search.py index 8194d3f945..6f6d91fca8 100644 --- a/nucliadb/tests/nucliadb/integration/search/test_search.py +++ b/nucliadb/tests/nucliadb/integration/search/test_search.py @@ -50,17 +50,18 @@ from tests.utils import broker_resource, inject_message +@pytest.mark.deploy_modes("standalone") async def test_simple_search_sc_2062( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): # PUBLIC API - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}") assert resp.status_code == 200 resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "slug": "myresource", "title": "My Title", @@ -71,19 +72,19 @@ async def test_simple_search_sc_2062( assert resp.status_code == 201 rid = resp.json()["uuid"] - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}") assert resp.status_code == 200 - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query=title") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?query=title") assert resp.status_code == 200 - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query=summary") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?query=summary") assert resp.status_code == 200 assert len(resp.json()["paragraphs"]["results"]) == 1 -def broker_resource_with_duplicates(knowledgebox, sentence): - bm = broker_resource(kbid=knowledgebox) +def broker_resource_with_duplicates(standalone_knowledgebox, sentence): + bm = broker_resource(kbid=standalone_knowledgebox) paragraph = sentence text = f"{paragraph}{paragraph}" etw = rpb.ExtractedTextWrapper() @@ -131,52 +132,62 @@ def broker_resource_with_duplicates(knowledgebox, sentence): return bm -async def create_resource_with_duplicates(knowledgebox, writer: WriterStub, sentence: str): - bm = broker_resource_with_duplicates(knowledgebox, sentence=sentence) +async def create_resource_with_duplicates(standalone_knowledgebox, writer: WriterStub, sentence: str): + bm = broker_resource_with_duplicates(standalone_knowledgebox, sentence=sentence) await inject_message(writer, bm) return bm.uuid +@pytest.mark.deploy_modes("standalone") async def test_search_filters_out_duplicate_paragraphs( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): - await create_resource_with_duplicates(knowledgebox, nucliadb_grpc, sentence="My own text Ramon. ") await create_resource_with_duplicates( - knowledgebox, nucliadb_grpc, sentence="Another different paragraph with text" + standalone_knowledgebox, nucliadb_ingest_grpc, sentence="My own text Ramon. " + ) + await create_resource_with_duplicates( + standalone_knowledgebox, nucliadb_ingest_grpc, sentence="Another different paragraph with text" ) query = "text" # It should filter out duplicates by default - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query={query}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?query={query}") assert resp.status_code == 200 content = resp.json() assert len(content["paragraphs"]["results"]) == 2 # It should filter out duplicates if specified - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query={query}&with_duplicates=false") + resp = await nucliadb_reader.get( + f"/kb/{standalone_knowledgebox}/search?query={query}&with_duplicates=false" + ) assert resp.status_code == 200 content = resp.json() assert len(content["paragraphs"]["results"]) == 2 # It should return duplicates if specified - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query={query}&with_duplicates=true") + resp = await nucliadb_reader.get( + f"/kb/{standalone_knowledgebox}/search?query={query}&with_duplicates=true" + ) assert resp.status_code == 200 content = resp.json() assert len(content["paragraphs"]["results"]) == 4 +@pytest.mark.deploy_modes("standalone") async def test_search_returns_paragraph_positions( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): sentence = "My own text Ramon." - await create_resource_with_duplicates(knowledgebox, nucliadb_grpc, sentence=sentence) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query=Ramon") + await create_resource_with_duplicates( + standalone_knowledgebox, nucliadb_ingest_grpc, sentence=sentence + ) + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?query=Ramon") assert resp.status_code == 200 content = resp.json() position = content["paragraphs"]["results"][0]["position"] @@ -186,8 +197,8 @@ async def test_search_returns_paragraph_positions( assert position["page_number"] is not None -def broker_resource_with_classifications(knowledgebox): - bm = broker_resource(kbid=knowledgebox) +def broker_resource_with_classifications(standalone_knowledgebox): + bm = broker_resource(kbid=standalone_knowledgebox) text = "Some text" etw = rpb.ExtractedTextWrapper() @@ -239,17 +250,18 @@ def broker_resource_with_classifications(knowledgebox): return bm +@pytest.mark.deploy_modes("standalone") async def test_search_returns_labels( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): - bm = broker_resource_with_classifications(knowledgebox) - await inject_message(nucliadb_grpc, bm) + bm = broker_resource_with_classifications(standalone_knowledgebox) + await inject_message(nucliadb_ingest_grpc, bm) resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/search?query=Some", + f"/kb/{standalone_knowledgebox}/search?query=Some", ) assert resp.status_code == 200 content = resp.json() @@ -258,39 +270,41 @@ async def test_search_returns_labels( assert par["labels"] == ["labelset1/label2", "labelset1/label1"] +@pytest.mark.deploy_modes("standalone") async def test_search_with_filters( nucliadb_reader: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): # Inject a resource with a pdf icon - bm = broker_resource(knowledgebox) + bm = broker_resource(standalone_knowledgebox) bm.basic.icon = "application/pdf" - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) # Check that filtering by pdf icon returns it resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/search?show=basic&filters=/icon/application/pdf" + f"/kb/{standalone_knowledgebox}/search?show=basic&filters=/icon/application/pdf" ) assert resp.status_code == 200 assert len(resp.json()["resources"]) == 1 # With a different icon should return no results resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/search?show=basic&filters=/icon/application/docx" + f"/kb/{standalone_knowledgebox}/search?show=basic&filters=/icon/application/docx" ) assert resp.status_code == 200 assert len(resp.json()["resources"]) == 0 +@pytest.mark.deploy_modes("standalone") async def test_paragraph_search_with_filters( - nucliadb_writer, - nucliadb_reader, - nucliadb_grpc, - knowledgebox, + nucliadb_writer: AsyncClient, + nucliadb_reader: AsyncClient, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): - kbid = knowledgebox + kbid = standalone_knowledgebox # Create a resource with two fields (title and summary) resp = await nucliadb_writer.post( f"/kb/{kbid}/resources", @@ -321,13 +335,14 @@ async def test_paragraph_search_with_filters( @pytest.mark.skip(reason="Needs sc-5626") +@pytest.mark.deploy_modes("standalone") async def test_( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "Rust for dummies", }, @@ -336,7 +351,7 @@ async def test_( rust_for_dummies = resp.json()["uuid"] resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "Introduction to Python", }, @@ -345,7 +360,7 @@ async def test_( intro_to_python = resp.json()["uuid"] resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/catalog", + f"/kb/{standalone_knowledgebox}/catalog", params={ "query": "Rust", }, @@ -356,7 +371,7 @@ async def test_( assert rust_for_dummies in resources resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/catalog", + f"/kb/{standalone_knowledgebox}/catalog", params={ "query": "Intro", }, @@ -367,15 +382,16 @@ async def test_( assert intro_to_python in resources +@pytest.mark.deploy_modes("standalone") async def test_search_returns_sentence_positions( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): - await inject_resource_with_a_sentence(knowledgebox, nucliadb_grpc) + await inject_resource_with_a_sentence(standalone_knowledgebox, nucliadb_ingest_grpc) resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/search", json=dict(query="my own text", min_score=-1) + f"/kb/{standalone_knowledgebox}/search", json=dict(query="my own text", min_score=-1) ) assert resp.status_code == 200 content = resp.json() @@ -386,8 +402,8 @@ async def test_search_returns_sentence_positions( assert "page_number" not in position -def get_resource_with_a_sentence(knowledgebox): - bm = broker_resource(knowledgebox) +def get_resource_with_a_sentence(standalone_knowledgebox): + bm = broker_resource(standalone_knowledgebox) bm.files["file"].file.uri = "http://nofile" bm.files["file"].file.size = 0 @@ -431,16 +447,17 @@ def get_resource_with_a_sentence(knowledgebox): return bm -async def inject_resource_with_a_sentence(knowledgebox, writer): - bm = get_resource_with_a_sentence(knowledgebox) +async def inject_resource_with_a_sentence(standalone_knowledgebox, writer): + bm = get_resource_with_a_sentence(standalone_knowledgebox) await inject_message(writer, bm) +@pytest.mark.deploy_modes("standalone") async def test_search_relations( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, knowledge_graph, ): relation_nodes, relation_edges = knowledge_graph @@ -453,7 +470,7 @@ async def test_search_relations( ) resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/search", + f"/kb/{standalone_knowledgebox}/search", params={ "features": "relations", "query": "What relates Newton and Becquer?", @@ -528,7 +545,7 @@ async def test_search_relations( predict_mock.detect_entities = AsyncMock(return_value=[relation_nodes["Animal"]]) resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/search", + f"/kb/{standalone_knowledgebox}/search", params={ "features": "relations", "query": "Do you like animals?", @@ -570,14 +587,15 @@ async def test_search_relations( assert expected_relation in entities[entity]["related_to"] +@pytest.mark.deploy_modes("standalone") async def test_search_automatic_relations( - nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, knowledgebox + nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, standalone_knowledgebox ): predict_mock = Mock() set_utility(Utility.PREDICT, predict_mock) resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My resource", "slug": "myresource", @@ -640,7 +658,7 @@ async def test_search_automatic_relations( predict_mock.detect_entities = AsyncMock(return_value=[rn]) resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/search", + f"/kb/{standalone_knowledgebox}/search", params={ "features": "relations", "query": "Relations for this resource", @@ -753,7 +771,7 @@ async def test_search_automatic_relations( predict_mock.detect_entities = AsyncMock(return_value=[rn]) resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/search", + f"/kb/{standalone_knowledgebox}/search", params={ "features": "relations", "query": "You know John?", @@ -794,14 +812,15 @@ async def get_audit_messages(sub): return auditreq +@pytest.mark.deploy_modes("standalone") async def test_search_sends_audit( - nucliadb_reader, - knowledgebox, + nucliadb_reader: AsyncClient, + standalone_knowledgebox, stream_audit: StreamAuditStorage, ): from nucliadb_utils.settings import audit_settings - kbid = knowledgebox + kbid = standalone_knowledgebox # Prepare a test audit stream to receive our messages partition = stream_audit.get_partition(kbid) @@ -839,13 +858,14 @@ async def test_search_sends_audit( @pytest.mark.parametrize("endpoint", ["search", "find"]) +@pytest.mark.deploy_modes("standalone") async def test_search_endpoints_handle_predict_errors( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox, predict_mock, endpoint, ): - kbid = knowledgebox + kbid = standalone_knowledgebox for query_mock in (AsyncMock(side_effect=SendToPredictError()),): predict_mock.query = query_mock @@ -862,7 +882,7 @@ async def test_search_endpoints_handle_predict_errors( async def create_dummy_resources( - nucliadb_writer: AsyncClient, nucliadb_grpc: WriterStub, kbid, n=10, start=0 + nucliadb_writer: AsyncClient, nucliadb_ingest_grpc: WriterStub, kbid, n=10, start=0 ): payloads = [ { @@ -903,24 +923,24 @@ async def create_dummy_resources( message.field_vectors.append(ev) message.source = BrokerMessage.MessageSource.PROCESSOR - await inject_message(nucliadb_grpc, message) + await inject_message(nucliadb_ingest_grpc, message) @pytest.fixture(scope="function") async def kb_with_one_logic_shard( - nucliadb_manager: AsyncClient, + nucliadb_writer_manager: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, ): - resp = await nucliadb_manager.post("/kbs", json={}) + resp = await nucliadb_writer_manager.post("/kbs", json={}) assert resp.status_code == 201 kbid = resp.json().get("uuid") - await create_dummy_resources(nucliadb_writer, nucliadb_grpc, kbid, n=10) + await create_dummy_resources(nucliadb_writer, nucliadb_ingest_grpc, kbid, n=10) yield kbid - resp = await nucliadb_manager.delete(f"/kb/{kbid}") + resp = await nucliadb_writer_manager.delete(f"/kb/{kbid}") assert resp.status_code == 200 @@ -933,37 +953,38 @@ def max_shard_paragraphs(): @pytest.fixture(scope="function") async def kb_with_two_logic_shards( max_shard_paragraphs, - nucliadb_manager: AsyncClient, + nucliadb_writer_manager: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, ): sc = shard_creator.ShardCreatorHandler( driver=get_driver(), storage=await get_storage(), pubsub=None, # type: ignore ) - resp = await nucliadb_manager.post("/kbs", json={}) + resp = await nucliadb_writer_manager.post("/kbs", json={}) assert resp.status_code == 201 kbid = resp.json().get("uuid") - await create_dummy_resources(nucliadb_writer, nucliadb_grpc, kbid, n=8) + await create_dummy_resources(nucliadb_writer, nucliadb_ingest_grpc, kbid, n=8) # trigger creating new shard manually here sc.shard_manager.should_create_new_shard = Mock(return_value=True) # type: ignore await sc.process_kb(kbid) - await create_dummy_resources(nucliadb_writer, nucliadb_grpc, kbid, n=10, start=8) + await create_dummy_resources(nucliadb_writer, nucliadb_ingest_grpc, kbid, n=10, start=8) yield kbid - resp = await nucliadb_manager.delete(f"/kb/{kbid}") + resp = await nucliadb_writer_manager.delete(f"/kb/{kbid}") assert resp.status_code == 200 @pytest.mark.flaky(reruns=5) +@pytest.mark.deploy_modes("standalone") async def test_search_two_logic_shards( nucliadb_reader: AsyncClient, - nucliadb_manager: AsyncClient, + nucliadb_reader_manager: AsyncClient, kb_with_one_logic_shard, kb_with_two_logic_shards, ): @@ -971,11 +992,11 @@ async def test_search_two_logic_shards( kbid2 = kb_with_two_logic_shards # Check that they have one and two logic shards, respectively - resp = await nucliadb_manager.get(f"kb/{kbid1}/shards") + resp = await nucliadb_reader_manager.get(f"kb/{kbid1}/shards") assert resp.status_code == 200 assert len(resp.json()["shards"]) == 1 - resp = await nucliadb_manager.get(f"kb/{kbid2}/shards") + resp = await nucliadb_reader_manager.get(f"kb/{kbid2}/shards") assert resp.status_code == 200 assert len(resp.json()["shards"]) == 2 @@ -1000,18 +1021,19 @@ async def test_search_two_logic_shards( assert len(content1["sentences"]["results"]) == len(content2["sentences"]["results"]) +@pytest.mark.deploy_modes("standalone") async def test_search_min_score( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): # When not specifying the min score on the request, it should default to 0.7 - resp = await nucliadb_reader.post(f"/kb/{knowledgebox}/search", json={"query": "dummy"}) + resp = await nucliadb_reader.post(f"/kb/{standalone_knowledgebox}/search", json={"query": "dummy"}) assert resp.status_code == 200 assert resp.json()["sentences"]["min_score"] == 0.7 # If we specify a min score, it should be used resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/search", + f"/kb/{standalone_knowledgebox}/search", json={"query": "dummy", "min_score": {"bm25": 10, "semantic": 0.5}}, ) assert resp.status_code == 200 @@ -1044,14 +1066,15 @@ async def test_search_min_score( (["/a/b", "/a/be"], True, ""), ], ) +@pytest.mark.deploy_modes("standalone") async def test_facets_validation( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox, facets, valid, error_message, ): - kbid = knowledgebox + kbid = standalone_knowledgebox for endpoint in ("search",): for method in ("post", "get"): func = getattr(nucliadb_reader, method) @@ -1066,13 +1089,14 @@ async def test_facets_validation( assert error_message in resp.json()["detail"][0]["msg"] +@pytest.mark.deploy_modes("standalone") async def test_search_marks_fuzzy_results( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "slug": "myresource", "title": "My Title", @@ -1082,7 +1106,7 @@ async def test_search_marks_fuzzy_results( # Should get only one non-fuzzy result resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/search", + f"/kb/{standalone_knowledgebox}/search", json={ "query": "Title", }, @@ -1093,7 +1117,7 @@ async def test_search_marks_fuzzy_results( # Should get only one fuzzy result resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/search", + f"/kb/{standalone_knowledgebox}/search", json={ "query": "totle", }, @@ -1104,7 +1128,7 @@ async def test_search_marks_fuzzy_results( # Should not get any result if exact match term queried resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/search", + f"/kb/{standalone_knowledgebox}/search", json={ "query": '"totle"', }, @@ -1122,17 +1146,18 @@ def check_fuzzy_paragraphs(search_response, *, fuzzy_result: bool, n_expected: i assert found == n_expected +@pytest.mark.deploy_modes("standalone") async def test_search_by_path_filter( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): paths = ["/foo", "foo/bar", "foo/bar/1", "foo/bar/2", "foo/bar/3", "foo/bar/4"] for path in paths: resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": f"My resource: {path}", "summary": "Some summary", @@ -1144,7 +1169,7 @@ async def test_search_by_path_filter( assert resp.status_code == 201 resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/catalog", + f"/kb/{standalone_knowledgebox}/catalog", params={ "query": "", }, @@ -1153,21 +1178,26 @@ async def test_search_by_path_filter( assert len(resp.json()["resources"]) == len(paths) # Get the list of all - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?filters=/origin.path/foo") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?filters=/origin.path/foo") assert resp.status_code == 200 assert len(resp.json()["resources"]) == len(paths) # Get the list of under foo/bar - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?filters=/origin.path/foo/bar") + resp = await nucliadb_reader.get( + f"/kb/{standalone_knowledgebox}/search?filters=/origin.path/foo/bar" + ) assert resp.status_code == 200 assert len(resp.json()["resources"]) == len(paths) - 1 # Get the list of under foo/bar/4 - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?filters=/origin.path/foo/bar/4") + resp = await nucliadb_reader.get( + f"/kb/{standalone_knowledgebox}/search?filters=/origin.path/foo/bar/4" + ) assert resp.status_code == 200 assert len(resp.json()["resources"]) == 1 +@pytest.mark.deploy_modes("standalone") async def test_search_kb_not_found(nucliadb_reader: AsyncClient): resp = await nucliadb_reader.get( "/kb/00000000000000/search?query=own+text", @@ -1175,8 +1205,11 @@ async def test_search_kb_not_found(nucliadb_reader: AsyncClient): assert resp.status_code == 404 -async def test_resource_search_query_param_is_optional(nucliadb_reader, knowledgebox): - kb = knowledgebox +@pytest.mark.deploy_modes("standalone") +async def test_resource_search_query_param_is_optional( + nucliadb_reader: AsyncClient, standalone_knowledgebox +): + kb = standalone_knowledgebox # If query is not present, should not fail resp = await nucliadb_reader.get(f"/kb/{kb}/search") assert resp.status_code == 200 @@ -1187,8 +1220,9 @@ async def test_resource_search_query_param_is_optional(nucliadb_reader, knowledg assert resp.status_code == 200 -async def test_search_with_duplicates(nucliadb_reader, knowledgebox): - kb = knowledgebox +@pytest.mark.deploy_modes("standalone") +async def test_search_with_duplicates(nucliadb_reader: AsyncClient, standalone_knowledgebox): + kb = standalone_knowledgebox resp = await nucliadb_reader.get(f"/kb/{kb}/search?with_duplicates=True") assert resp.status_code == 200 @@ -1205,10 +1239,11 @@ def search_with_limits_exceeded_error(): yield +@pytest.mark.deploy_modes("standalone") async def test_search_handles_limits_exceeded_error( - nucliadb_reader, knowledgebox, search_with_limits_exceeded_error + nucliadb_reader: AsyncClient, standalone_knowledgebox, search_with_limits_exceeded_error ): - kb = knowledgebox + kb = standalone_knowledgebox resp = await nucliadb_reader.get(f"/kb/{kb}/search") assert resp.status_code == 402 assert resp.json() == {"detail": "over the quota"} @@ -1218,12 +1253,13 @@ async def test_search_handles_limits_exceeded_error( assert resp.json() == {"detail": "over the quota"} +@pytest.mark.deploy_modes("standalone") async def test_catalog_post( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/catalog", + f"/kb/{standalone_knowledgebox}/catalog", json={ "query": "", "filters": [ @@ -1248,6 +1284,7 @@ def not_debug(): running_settings.debug = prev +@pytest.mark.deploy_modes("standalone") async def test_api_does_not_show_tracebacks_on_api_errors(not_debug, nucliadb_reader: AsyncClient): with mock.patch( "nucliadb.search.api.v1.search.search", @@ -1258,15 +1295,16 @@ async def test_api_does_not_show_tracebacks_on_api_errors(not_debug, nucliadb_re assert resp.json() == {"detail": "Something went wrong, please contact your administrator"} +@pytest.mark.deploy_modes("standalone") async def test_catalog_pagination( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): n_resources = 35 for i in range(n_resources): resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": f"Resource {i}", "texts": { @@ -1287,7 +1325,7 @@ async def test_catalog_pagination( page_number = 0 while True: resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/catalog", + f"/kb/{standalone_knowledgebox}/catalog", params={ "page_number": page_number, "page_size": page_size, @@ -1309,14 +1347,15 @@ async def test_catalog_pagination( assert len(resource_uuids) == n_resources +@pytest.mark.deploy_modes("standalone") async def test_catalog_date_range_filtering( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): now = datetime.now() resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": f"Resource", "texts": { @@ -1330,7 +1369,7 @@ async def test_catalog_date_range_filtering( one_hour_ago = now - timedelta(hours=1) resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/catalog", + f"/kb/{standalone_knowledgebox}/catalog", params={ "range_creation_start": one_hour_ago.isoformat(), }, @@ -1340,7 +1379,7 @@ async def test_catalog_date_range_filtering( assert len(body["resources"]) == 1 resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/catalog", + f"/kb/{standalone_knowledgebox}/catalog", json={ "range_creation_end": one_hour_ago.isoformat(), }, @@ -1350,23 +1389,24 @@ async def test_catalog_date_range_filtering( assert len(body["resources"]) == 0 +@pytest.mark.deploy_modes("standalone") async def test_catalog_faceted( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): valid_status = ["PROCESSED", "PENDING", "ERROR"] for status_name, status_value in rpb.Metadata.Status.items(): if status_name not in valid_status: continue - bm = broker_resource(knowledgebox) + bm = broker_resource(standalone_knowledgebox) bm.basic.metadata.status = status_value - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/catalog?faceted=/metadata.status", + f"/kb/{standalone_knowledgebox}/catalog?faceted=/metadata.status", ) assert resp.status_code == 200 body = resp.json() @@ -1378,11 +1418,12 @@ async def test_catalog_faceted( assert count == 1 +@pytest.mark.deploy_modes("standalone") async def test_catalog_faceted_labels( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): # 4 resources: # 1 with /l/labelset0/label0 @@ -1390,22 +1431,22 @@ async def test_catalog_faceted_labels( # 1 with /l/labelset1/label0 for label in range(2): for count in range(label + 1): - bm = broker_resource(knowledgebox) + bm = broker_resource(standalone_knowledgebox) c = rpb.Classification() c.labelset = f"labelset0" c.label = f"label{label}" bm.basic.usermetadata.classifications.append(c) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) - bm = broker_resource(knowledgebox) + bm = broker_resource(standalone_knowledgebox) c = rpb.Classification() c.labelset = "labelset1" c.label = "label0" bm.basic.usermetadata.classifications.append(c) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/catalog?faceted=/classification.labels/labelset0", + f"/kb/{standalone_knowledgebox}/catalog?faceted=/classification.labels/labelset0", ) assert resp.status_code == 200 body = resp.json() @@ -1418,7 +1459,7 @@ async def test_catalog_faceted_labels( # This is used by the check missing labels button in dashboard resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/catalog?faceted=/classification.labels", + f"/kb/{standalone_knowledgebox}/catalog?faceted=/classification.labels", ) assert resp.status_code == 200 body = resp.json() @@ -1430,24 +1471,25 @@ async def test_catalog_faceted_labels( } +@pytest.mark.deploy_modes("standalone") async def test_catalog_filters( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): valid_status = ["PROCESSED", "PENDING", "ERROR"] for status_name, status_value in rpb.Metadata.Status.items(): if status_name not in valid_status: continue - bm = broker_resource(knowledgebox) + bm = broker_resource(standalone_knowledgebox) bm.basic.metadata.status = status_value - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) # No filters resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/catalog", + f"/kb/{standalone_knowledgebox}/catalog", ) assert resp.status_code == 200 body = resp.json() @@ -1455,7 +1497,7 @@ async def test_catalog_filters( # Simple filter resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/catalog?filters=/metadata.status/PENDING", + f"/kb/{standalone_knowledgebox}/catalog?filters=/metadata.status/PENDING", ) assert resp.status_code == 200 body = resp.json() @@ -1464,7 +1506,7 @@ async def test_catalog_filters( # AND filter resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/catalog", + f"/kb/{standalone_knowledgebox}/catalog", json={"filters": [{"all": ["/metadata.status/PENDING", "/metadata.status/ERROR"]}]}, ) assert resp.status_code == 200 @@ -1473,7 +1515,7 @@ async def test_catalog_filters( # OR filter resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/catalog", + f"/kb/{standalone_knowledgebox}/catalog", json={"filters": [{"any": ["/metadata.status/PENDING", "/metadata.status/ERROR"]}]}, ) assert resp.status_code == 200 diff --git a/nucliadb/tests/nucliadb/integration/search/test_search_date_ranges_filter.py b/nucliadb/tests/nucliadb/integration/search/test_search_date_ranges_filter.py index e96d1f0f6d..f1fdf62f52 100644 --- a/nucliadb/tests/nucliadb/integration/search/test_search_date_ranges_filter.py +++ b/nucliadb/tests/nucliadb/integration/search/test_search_date_ranges_filter.py @@ -24,6 +24,7 @@ from nucliadb.tests.vectors import V1 from nucliadb_models.search import SearchOptions +from nucliadb_protos.writer_pb2_grpc import WriterStub from tests.nucliadb.integration.search.test_search import get_resource_with_a_sentence from tests.utils import inject_message @@ -41,13 +42,13 @@ def a_week_before(date): @pytest.fixture(scope="function") -async def resource(nucliadb_grpc, knowledgebox): - bm = get_resource_with_a_sentence(knowledgebox) +async def resource(nucliadb_ingest_grpc: WriterStub, standalone_knowledgebox): + bm = get_resource_with_a_sentence(standalone_knowledgebox) bm.basic.created.FromDatetime(NOW) bm.basic.modified.FromDatetime(NOW) bm.origin.ClearField("created") bm.origin.ClearField("modified") - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) return bm.uuid @@ -81,9 +82,10 @@ async def resource(nucliadb_grpc, knowledgebox): SearchOptions.SEMANTIC, ], ) +@pytest.mark.deploy_modes("standalone") async def test_search_with_date_range_filters_nucliadb_dates( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox: str, feature, resource, creation_start, @@ -98,7 +100,7 @@ async def test_search_with_date_range_filters_nucliadb_dates( """ await _test_find_date_ranges( nucliadb_reader, - knowledgebox, + standalone_knowledgebox, [feature], creation_start, creation_end, @@ -138,10 +140,11 @@ async def test_search_with_date_range_filters_nucliadb_dates( SearchOptions.SEMANTIC, ], ) +@pytest.mark.deploy_modes("standalone") async def test_search_with_date_range_filters_origin_dates( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox: str, feature, resource, creation_start, @@ -156,7 +159,7 @@ async def test_search_with_date_range_filters_origin_dates( """ # Set origin dates of the resource resp = await nucliadb_writer.patch( - f"/kb/{knowledgebox}/resource/{resource}", + f"/kb/{standalone_knowledgebox}/resource/{resource}", json={ "origin": { "created": ORIGIN_CREATION.isoformat(), @@ -168,7 +171,7 @@ async def test_search_with_date_range_filters_origin_dates( await _test_find_date_ranges( nucliadb_reader, - knowledgebox, + standalone_knowledgebox, [feature], creation_start, creation_end, @@ -178,8 +181,9 @@ async def test_search_with_date_range_filters_origin_dates( ) +@pytest.mark.deploy_modes("standalone") async def _test_find_date_ranges( - nucliadb_reader, + nucliadb_reader: AsyncClient, kbid, features, creation_start, diff --git a/nucliadb/tests/nucliadb/integration/search/test_search_sorting.py b/nucliadb/tests/nucliadb/integration/search/test_search_sorting.py index 7534569114..eebdb9ba64 100644 --- a/nucliadb/tests/nucliadb/integration/search/test_search_sorting.py +++ b/nucliadb/tests/nucliadb/integration/search/test_search_sorting.py @@ -25,6 +25,7 @@ from nucliadb_models.search import SearchOptions +@pytest.mark.deploy_modes("standalone") async def test_search_sort_by_score( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, @@ -59,6 +60,7 @@ async def test_search_sort_by_score( ("modified", "desc", lambda x: list(reversed(sorted(x)))), ], ) +@pytest.mark.deploy_modes("standalone") async def test_search_sorted_by_creation_and_modification_dates( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, @@ -100,6 +102,7 @@ async def test_search_sorted_by_creation_and_modification_dates( ("title", "desc", lambda x: list(reversed(sorted(x)))), ], ) +@pytest.mark.deploy_modes("standalone") async def test_limited_sorted_search_of_most_relevant_results( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, @@ -143,6 +146,7 @@ async def test_limited_sorted_search_of_most_relevant_results( assert sort_fields == sort_function(sort_fields) +@pytest.mark.deploy_modes("standalone") async def test_empty_query_search_for_ordered_resources_by_creation_date_desc( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, @@ -167,6 +171,7 @@ async def test_empty_query_search_for_ordered_resources_by_creation_date_desc( assert creation_dates == sorted(creation_dates, reverse=True) +@pytest.mark.deploy_modes("standalone") async def test_list_all_resources_by_creation_and_modification_dates_with_empty_queries( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, diff --git a/nucliadb/tests/nucliadb/integration/test_api.py b/nucliadb/tests/nucliadb/integration/test_api.py index 07e465f1f8..4c80dcb4f1 100644 --- a/nucliadb/tests/nucliadb/integration/test_api.py +++ b/nucliadb/tests/nucliadb/integration/test_api.py @@ -62,9 +62,10 @@ ) +@pytest.mark.deploy_modes("standalone") async def test_kb_creation_allows_setting_learning_configuration( - nucliadb_manager, - nucliadb_reader, + nucliadb_writer_manager: AsyncClient, + nucliadb_reader: AsyncClient, onprem_nucliadb, ): with patch("nucliadb.writer.api.v1.knowledgebox.learning_proxy", new=AsyncMock()) as learning_proxy: @@ -85,7 +86,7 @@ async def test_kb_creation_allows_setting_learning_configuration( ) # Check that we can define it to a different semantic model - resp = await nucliadb_manager.post( + resp = await nucliadb_writer_manager.post( f"/kbs", json={ "title": "My KB with english semantic model", @@ -101,25 +102,26 @@ async def test_kb_creation_allows_setting_learning_configuration( ) +@pytest.mark.deploy_modes("standalone") async def test_creation( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - nucliadb_train: TrainStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + nucliadb_train_grpc: TrainStub, + standalone_knowledgebox, ): # PUBLIC API - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}") assert resp.status_code == 200 resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/labelset/ls1", + f"/kb/{standalone_knowledgebox}/labelset/ls1", json={"title": "Labelset 1", "labels": [{"text": "text", "title": "title"}]}, ) assert resp.status_code == 200 resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My title", "slug": "myresource", @@ -149,12 +151,12 @@ async def test_creation( bm.extracted_text.append(et) bm.field_metadata.append(fm) bm.uuid = rid - bm.kbid = knowledgebox + bm.kbid = standalone_knowledgebox - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}?show=extracted&show=values&extracted=text&extracted=metadata", + f"/kb/{standalone_knowledgebox}/resource/{rid}?show=extracted&show=values&extracted=text&extracted=metadata", ) assert resp.status_code == 200 assert ( @@ -167,7 +169,7 @@ async def test_creation( # ADD A LABEL resp = await nucliadb_writer.patch( - f"/kb/{knowledgebox}/resource/{rid}", + f"/kb/{standalone_knowledgebox}/resource/{rid}", json={ "fieldmetadata": [ { @@ -188,17 +190,17 @@ async def test_creation( assert resp.status_code == 200 resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}?show=errors&show=values&show=basic", + f"/kb/{standalone_knowledgebox}/resource/{rid}?show=errors&show=values&show=basic", ) assert resp.status_code == 200 # TRAINING GRPC API request = GetSentencesRequest() - request.kb.uuid = knowledgebox + request.kb.uuid = standalone_knowledgebox request.metadata.labels = True request.metadata.text = True paragraph: TrainParagraph - async for paragraph in nucliadb_train.GetParagraphs(request): # type: ignore + async for paragraph in nucliadb_train_grpc.GetParagraphs(request): # type: ignore if paragraph.field.field == "title": assert paragraph.metadata.text == "My title" else: @@ -210,33 +212,37 @@ async def test_creation( trainset.batch_size = 20 trainset.type = TaskType.PARAGRAPH_CLASSIFICATION trainset.filter.labels.append("ls1") - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/trainset") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/trainset") assert resp.status_code == 200 data = resp.json() assert len(data["partitions"]) == 1 partition_id = data["partitions"][0] resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/trainset/{partition_id}", + f"/kb/{standalone_knowledgebox}/trainset/{partition_id}", content=trainset.SerializeToString(), ) assert len(resp.content) > 0 -async def test_can_create_knowledgebox_with_colon_in_slug(nucliadb_manager: AsyncClient): - resp = await nucliadb_manager.post("/kbs", json={"slug": "something:else"}) +@pytest.mark.deploy_modes("standalone") +async def test_can_create_standalone_knowledgebox_with_colon_in_slug( + nucliadb_writer_manager: AsyncClient, nucliadb_reader_manager: AsyncClient +): + resp = await nucliadb_writer_manager.post("/kbs", json={"slug": "something:else"}) assert resp.status_code == 201 - resp = await nucliadb_manager.get(f"/kbs") + resp = await nucliadb_reader_manager.get(f"/kbs") assert resp.status_code == 200 assert resp.json()["kbs"][0]["slug"] == "something:else" +@pytest.mark.deploy_modes("standalone") async def test_serialize_errors( nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox: str, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox: str, ): """ Test description: @@ -246,7 +252,7 @@ async def test_serialize_errors( """ resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My resource", "texts": {"text": TEST_TEXT_PAYLOAD}, @@ -258,7 +264,7 @@ async def test_serialize_errors( assert resp.status_code == 201 rid = resp.json()["uuid"] - br = broker_resource(knowledgebox, rid=rid) + br = broker_resource(standalone_knowledgebox, rid=rid) # Add an error for every field type fields_to_test = [ @@ -281,10 +287,10 @@ async def test_serialize_errors( ) br.errors.append(error) - await inject_message(nucliadb_grpc, br) + await inject_message(nucliadb_ingest_grpc, br) resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{br.uuid}", + f"/kb/{standalone_knowledgebox}/resource/{br.uuid}", params=dict(show=["extracted", "errors", "basic"], extracted=["metadata"]), ) assert resp.status_code == 200 @@ -295,11 +301,12 @@ async def test_serialize_errors( assert resp_json["data"][ftypestring][fid]["error"]["code"] == 1 +@pytest.mark.deploy_modes("standalone") async def test_entitygroups( nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox: str, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox: str, ): await wait_for_sync() entitygroup = { @@ -316,11 +323,11 @@ async def test_entitygroups( "spoon": {"value": "Spoon"}, }, } - resp = await nucliadb_writer.post(f"/kb/{knowledgebox}/entitiesgroups", json=entitygroup) + resp = await nucliadb_writer.post(f"/kb/{standalone_knowledgebox}/entitiesgroups", json=entitygroup) assert resp.status_code == 200 # Entities are not returned by default - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/entitiesgroups") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/entitiesgroups") groups = resp.json()["groups"] assert "entities" in groups["group1"] assert len(groups["group1"]["entities"]) == 0 @@ -329,15 +336,16 @@ async def test_entitygroups( assert groups["group1"]["custom"] is True # show_entities=true returns a http 400 - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/entitiesgroups?show_entities=true") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/entitiesgroups?show_entities=true") assert resp.status_code == 400 +@pytest.mark.deploy_modes("standalone") async def test_extracted_shortened_metadata( nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox: str, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox: str, ): """ Test description: @@ -345,7 +353,7 @@ async def test_extracted_shortened_metadata( - Create a resource with a field containing FieldMetadata with ner, positions and relations. - Check that new extracted data option filters them out """ - br = broker_resource(knowledgebox) + br = broker_resource(standalone_knowledgebox) field = rpb.FieldID(field_type=rpb.FieldType.TEXT, field="text") fcmw = FieldComputedMetadataWrapper() @@ -378,14 +386,14 @@ async def test_extracted_shortened_metadata( br.field_metadata.append(fcmw) - await inject_message(nucliadb_grpc, br) + await inject_message(nucliadb_ingest_grpc, br) # TODO: Remove ner and positions once fields are removed cropped_fields = ["ner", "positions", "relations", "classifications"] # Check that when 'shortened_metadata' in extracted param fields are cropped resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{br.uuid}/text/text", + f"/kb/{standalone_knowledgebox}/resource/{br.uuid}/text/text", params=dict(show=["extracted"], extracted=["shortened_metadata"]), ) assert resp.status_code == 200 @@ -399,7 +407,7 @@ async def test_extracted_shortened_metadata( # Check that when 'metadata' in extracted param fields are returned for extracted_param in (["metadata"], ["metadata", "shortened_metadata"]): resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{br.uuid}/text/text", + f"/kb/{standalone_knowledgebox}/resource/{br.uuid}/text/text", params=dict(show=["extracted"], extracted=extracted_param), ) assert resp.status_code == 200 @@ -420,9 +428,10 @@ async def test_extracted_shortened_metadata( ("Invalid&Character", True), ], ) +@pytest.mark.deploy_modes("standalone") async def test_field_ids_are_validated( nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, field_id, error, ): @@ -435,7 +444,7 @@ async def test_field_ids_are_validated( } }, } - resp = await nucliadb_writer.post(f"/kb/{knowledgebox}/resources", json=payload) + resp = await nucliadb_writer.post(f"/kb/{standalone_knowledgebox}/resources", json=payload) if error: assert resp.status_code == 422 body = resp.json() @@ -444,10 +453,11 @@ async def test_field_ids_are_validated( assert resp.status_code == 201 +@pytest.mark.deploy_modes("standalone") async def test_extra( nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, ): """ Test description: @@ -456,7 +466,7 @@ async def test_extra( - Check that it is returned only if requested on search results - Check modification """ - kbid = knowledgebox + kbid = standalone_knowledgebox invalid_extra = {"metadata": {i: f"foo{i}" for i in range(100000)}} resp = await nucliadb_writer.post( f"/kb/{kbid}/resources", @@ -520,12 +530,13 @@ async def test_extra( assert resp.json()["extra"] == extra +@pytest.mark.deploy_modes("standalone") async def test_icon_doesnt_change_after_labeling_resource_sc_5625( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await nucliadb_writer.post( f"/kb/{kbid}/resources", json={"title": "Foo", "icon": "application/pdf"}, @@ -559,11 +570,14 @@ async def test_icon_doesnt_change_after_labeling_resource_sc_5625( ("foo/bar", False), # with slash ], ) -async def test_resource_slug_validation(nucliadb_writer, nucliadb_reader, knowledgebox, slug, valid): - resp = await nucliadb_writer.post(f"/kb/{knowledgebox}/resources", json={"slug": slug}) +@pytest.mark.deploy_modes("standalone") +async def test_resource_slug_validation( + nucliadb_writer, nucliadb_reader: AsyncClient, standalone_knowledgebox, slug, valid +): + resp = await nucliadb_writer.post(f"/kb/{standalone_knowledgebox}/resources", json={"slug": slug}) if valid: assert resp.status_code == 201 - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/slug/{slug}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/slug/{slug}") assert resp.status_code == 200 else: assert resp.status_code == 422 @@ -572,12 +586,13 @@ async def test_resource_slug_validation(nucliadb_writer, nucliadb_reader, knowle assert f"Invalid slug: '{slug}'" in detail["msg"] +@pytest.mark.deploy_modes("standalone") async def test_icon_doesnt_change_after_adding_file_field_sc_2388( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await nucliadb_writer.post( f"/kb/{kbid}/resources", json={ @@ -603,13 +618,14 @@ async def test_icon_doesnt_change_after_adding_file_field_sc_2388( assert resp.json()["icon"] == "text/plain" +@pytest.mark.deploy_modes("standalone") async def test_language_metadata( - nucliadb_writer, - nucliadb_reader, - nucliadb_grpc, - knowledgebox, + nucliadb_writer: AsyncClient, + nucliadb_reader: AsyncClient, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await nucliadb_writer.post( f"/kb/{kbid}/resources", json={"title": "My resource"}, @@ -639,7 +655,7 @@ async def test_language_metadata( fcmw.metadata.split_metadata["foo"].language = "it" bm.field_metadata.append(fcmw) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) resp = await nucliadb_reader.get(f"/kb/{kbid}/resource/{uuid}", params={"show": ["basic"]}) assert resp.status_code == 200 @@ -673,13 +689,14 @@ async def test_language_metadata( assert res["metadata"]["languages"] == [] +@pytest.mark.deploy_modes("standalone") async def test_story_7081( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My title", "slug": "myresource", @@ -691,28 +708,29 @@ async def test_story_7081( rid = resp.json()["uuid"] resp = await nucliadb_writer.patch( - f"/kb/{knowledgebox}/resource/{rid}", + f"/kb/{standalone_knowledgebox}/resource/{rid}", json={"origin": {"metadata": {"some": "data"}}}, ) assert resp.status_code == 200 resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}?show=origin", + f"/kb/{standalone_knowledgebox}/resource/{rid}?show=origin", ) assert resp.status_code == 200 data = resp.json() assert data["origin"]["metadata"]["some"] == "data" +@pytest.mark.deploy_modes("standalone") async def test_question_answer( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): # create a new resource resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My title", "slug": "myresource", @@ -747,12 +765,12 @@ async def test_question_answer( message.question_answers.append(qaw) message.uuid = rid - message.kbid = knowledgebox + message.kbid = standalone_knowledgebox - await inject_message(nucliadb_grpc, message) + await inject_message(nucliadb_ingest_grpc, message) resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}?show=extracted&extracted=question_answers", + f"/kb/{standalone_knowledgebox}/resource/{rid}?show=extracted&extracted=question_answers", ) assert resp.status_code == 200 data = resp.json() @@ -780,11 +798,12 @@ async def test_question_answer( } +@pytest.mark.deploy_modes("standalone") async def test_question_answer_annotations( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): qa_annotation = metadata.QuestionAnswerAnnotation( question_answer=common.QuestionAnswer( @@ -805,7 +824,7 @@ async def test_question_answer_annotations( ) resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My title", "slug": "myresource", @@ -826,7 +845,7 @@ async def test_question_answer_annotations( rid = resp.json()["uuid"] resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}?show=basic", + f"/kb/{standalone_knowledgebox}/resource/{rid}?show=basic", ) assert resp.status_code == 200 data = resp.json() @@ -834,13 +853,14 @@ async def test_question_answer_annotations( assert resource.fieldmetadata[0].question_answers[0] == qa_annotation # type: ignore +@pytest.mark.deploy_modes("standalone") async def test_link_fields_store_css_selector( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My title", "slug": "myresource", @@ -857,7 +877,7 @@ async def test_link_fields_store_css_selector( rid = resp.json()["uuid"] resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}?show=values", + f"/kb/{standalone_knowledgebox}/resource/{rid}?show=values", ) assert resp.status_code == 200 data = resp.json() @@ -873,13 +893,14 @@ async def test_link_fields_store_css_selector( assert css_selector == "main" +@pytest.mark.deploy_modes("standalone") async def test_link_fields_store_xpath( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My title", "slug": "myresource", @@ -896,7 +917,7 @@ async def test_link_fields_store_xpath( rid = resp.json()["uuid"] resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}?show=values", + f"/kb/{standalone_knowledgebox}/resource/{rid}?show=values", ) assert resp.status_code == 200 data = resp.json() @@ -912,12 +933,13 @@ async def test_link_fields_store_xpath( assert xpath == "my_xpath" +@pytest.mark.deploy_modes("standalone") async def test_dates_are_properly_validated( nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await nucliadb_writer.post( f"/kb/{kbid}/resources", json={ @@ -949,14 +971,15 @@ async def test_dates_are_properly_validated( assert resp.json()["origin"]["created"] == "0001-01-01T00:00:00Z" +@pytest.mark.deploy_modes("standalone") async def test_file_computed_titles_are_set_on_resource_title( - nucliadb_writer, - nucliadb_grpc, - nucliadb_reader, - knowledgebox, + nucliadb_writer: AsyncClient, + nucliadb_ingest_grpc: WriterStub, + nucliadb_reader: AsyncClient, + standalone_knowledgebox, ): # Create a resource with an email field - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await nucliadb_writer.post( f"/kb/{kbid}/resources", json={ @@ -986,7 +1009,7 @@ async def test_file_computed_titles_are_set_on_resource_title( fed.field = "email" fed.title = extracted_title bm.file_extracted_data.append(fed) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) # Check that the resource title changed resp = await nucliadb_reader.get(f"/kb/{kbid}/resource/{rid}") @@ -994,7 +1017,7 @@ async def test_file_computed_titles_are_set_on_resource_title( assert resp.json()["title"] == extracted_title # Now test that if the title is changed on creation, it is not overwritten - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await nucliadb_writer.post( f"/kb/{kbid}/resources", json={ @@ -1024,7 +1047,7 @@ async def test_file_computed_titles_are_set_on_resource_title( fed.field = "email" fed.title = extracted_title bm.file_extracted_data.append(fed) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) # Check that the resource title changed resp = await nucliadb_reader.get(f"/kb/{kbid}/resource/{rid2}") @@ -1034,13 +1057,14 @@ async def test_file_computed_titles_are_set_on_resource_title( assert title == "Something else" +@pytest.mark.deploy_modes("standalone") async def test_jsonl_text_field( nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): resp = await nucliadb_writer.post( - f"kb/{knowledgebox}/resources", + f"kb/{standalone_knowledgebox}/resources", json={ "title": "My title", "texts": { @@ -1060,7 +1084,7 @@ async def test_jsonl_text_field( rid = resp.json()["uuid"] resp = await nucliadb_reader.get( - f"kb/{knowledgebox}/resource/{rid}?show=values&show=basic", + f"kb/{standalone_knowledgebox}/resource/{rid}?show=values&show=basic", ) assert resp.status_code == 200, resp.text data = resp.json() @@ -1075,10 +1099,11 @@ async def test_jsonl_text_field( assert data["icon"] == "application/x-ndjson" +@pytest.mark.deploy_modes("standalone") async def test_extract_strategy_on_fields( nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): processing = get_processing() assert isinstance(processing, DummyProcessingEngine) @@ -1087,7 +1112,7 @@ async def test_extract_strategy_on_fields( # Create a resource with a field of each type resp = await nucliadb_writer.post( - f"kb/{knowledgebox}/resources", + f"kb/{standalone_knowledgebox}/resources", json={ "title": "My title", "texts": { @@ -1120,7 +1145,7 @@ async def test_extract_strategy_on_fields( # Check that the extract strategies are stored resp = await nucliadb_reader.get( - f"kb/{knowledgebox}/resource/{rid}?show=values", + f"kb/{standalone_knowledgebox}/resource/{rid}?show=values", ) assert resp.status_code == 200, resp.text data = resp.json() @@ -1145,7 +1170,7 @@ def validate_processing_call(processing: DummyProcessingEngine): # Reprocess resource should also send the extract strategies resp = await nucliadb_writer.post( - f"kb/{knowledgebox}/resource/{rid}/reprocess", + f"kb/{standalone_knowledgebox}/resource/{rid}/reprocess", ) assert resp.status_code == 202, resp.text @@ -1153,7 +1178,7 @@ def validate_processing_call(processing: DummyProcessingEngine): # Update them to make sure they are stored correctly resp = await nucliadb_writer.patch( - f"kb/{knowledgebox}/resource/{rid}", + f"kb/{standalone_knowledgebox}/resource/{rid}", json={ "texts": { "text": { @@ -1184,7 +1209,7 @@ def validate_processing_call(processing: DummyProcessingEngine): # Check that the extract strategies are stored resp = await nucliadb_reader.get( - f"kb/{knowledgebox}/resource/{rid}?show=values", + f"kb/{standalone_knowledgebox}/resource/{rid}?show=values", ) assert resp.status_code == 200, resp.text @@ -1198,7 +1223,7 @@ def validate_processing_call(processing: DummyProcessingEngine): # Upload a file with the upload endpoint, and set the extract strategy via a header resp = await nucliadb_writer.post( - f"kb/{knowledgebox}/resource/{rid}/file/file2/upload", + f"kb/{standalone_knowledgebox}/resource/{rid}/file/file2/upload", headers={"x-extract-strategy": "barbafoo"}, content=b"file content", ) @@ -1207,7 +1232,7 @@ def validate_processing_call(processing: DummyProcessingEngine): # Check that the extract strategy is stored resp = await nucliadb_reader.get( - f"kb/{knowledgebox}/resource/{rid}?show=values", + f"kb/{standalone_knowledgebox}/resource/{rid}?show=values", ) assert resp.status_code == 200, resp.text data = resp.json() @@ -1236,7 +1261,7 @@ def header_encode(some_string): ) file_content = b"file content" resp = await nucliadb_writer.post( - f"kb/{knowledgebox}/tusupload", + f"kb/{standalone_knowledgebox}/tusupload", headers={ "x-extract-strategy": "barbafoo-tus", "tus-resumable": "1.0.0", @@ -1262,7 +1287,7 @@ def header_encode(some_string): # Check that the extract strategy is stored resp = await nucliadb_reader.get( - f"kb/{knowledgebox}/resource/{rid}?show=values", + f"kb/{standalone_knowledgebox}/resource/{rid}?show=values", ) assert resp.status_code == 200, resp.text data = resp.json() diff --git a/nucliadb/tests/nucliadb/integration/test_ask.py b/nucliadb/tests/nucliadb/integration/test_ask.py index a1c95979f6..ab1cdf443d 100644 --- a/nucliadb/tests/nucliadb/integration/test_ask.py +++ b/nucliadb/tests/nucliadb/integration/test_ask.py @@ -31,7 +31,7 @@ StatusGenerativeResponse, ) -from nucliadb.search.predict import AnswerStatusCode +from nucliadb.search.predict import AnswerStatusCode, DummyPredictEngine from nucliadb.search.utilities import get_predict from nucliadb_models.search import ( AskRequest, @@ -61,16 +61,17 @@ def audit(): yield audit_mock +@pytest.mark.deploy_modes("standalone") async def test_ask( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox: str, ): - resp = await nucliadb_reader.post(f"/kb/{knowledgebox}/ask", json={"query": "query"}) + resp = await nucliadb_reader.post(f"/kb/{standalone_knowledgebox}/ask", json={"query": "query"}) assert resp.status_code == 200 context = [{"author": "USER", "text": "query"}] resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "query", "context": context, @@ -88,19 +89,20 @@ def find_incomplete_results(): yield +@pytest.mark.deploy_modes("standalone") async def test_ask_handles_incomplete_find_results( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox: str, find_incomplete_results, ): - resp = await nucliadb_reader.post(f"/kb/{knowledgebox}/ask", json={"query": "query"}) + resp = await nucliadb_reader.post(f"/kb/{standalone_knowledgebox}/ask", json={"query": "query"}) assert resp.status_code == 529 assert resp.json() == {"detail": "Temporary error on information retrieval. Please try again."} @pytest.fixture -async def resource(nucliadb_writer, knowledgebox): - kbid = knowledgebox +async def resource(nucliadb_writer: AsyncClient, standalone_knowledgebox: str): + kbid = standalone_knowledgebox resp = await nucliadb_writer.post( f"/kb/{kbid}/resources", json={ @@ -116,9 +118,9 @@ async def resource(nucliadb_writer, knowledgebox): @pytest.fixture -async def graph_resource(nucliadb_writer, nucliadb_grpc, knowledgebox): +async def graph_resource(nucliadb_writer: AsyncClient, nucliadb_ingest_grpc, standalone_knowledgebox): resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "Knowledge graph", "slug": "knowledgegraph", @@ -194,16 +196,17 @@ async def graph_resource(nucliadb_writer, nucliadb_grpc, knowledgebox): ] bm = BrokerMessage() bm.uuid = rid - bm.kbid = knowledgebox + bm.kbid = standalone_knowledgebox bm.relations.extend(edges) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) await wait_for_sync() return rid -async def test_ask_synchronous(nucliadb_reader: AsyncClient, knowledgebox, resource): +@pytest.mark.deploy_modes("standalone") +async def test_ask_synchronous(nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource): resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={"query": "title"}, headers={"X-Synchronous": "True"}, ) @@ -214,9 +217,12 @@ async def test_ask_synchronous(nucliadb_reader: AsyncClient, knowledgebox, resou assert resp_data.status == AnswerStatusCode.SUCCESS.prettify() -async def test_ask_status_code_no_retrieval_data(nucliadb_reader: AsyncClient, knowledgebox): +@pytest.mark.deploy_modes("standalone") +async def test_ask_status_code_no_retrieval_data( + nucliadb_reader: AsyncClient, standalone_knowledgebox: str +): resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={"query": "title"}, headers={"X-Synchronous": "True"}, ) @@ -227,7 +233,8 @@ async def test_ask_status_code_no_retrieval_data(nucliadb_reader: AsyncClient, k assert resp_data.status == AnswerStatusCode.NO_RETRIEVAL_DATA.prettify() -async def test_ask_with_citations(nucliadb_reader: AsyncClient, knowledgebox, resource): +@pytest.mark.deploy_modes("standalone") +async def test_ask_with_citations(nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource): citations = {"foo": [], "bar": []} # type: ignore citations_gen = CitationsGenerativeResponse(citations=citations) citations_chunk = GenerativeChunk(chunk=citations_gen) @@ -236,7 +243,7 @@ async def test_ask_with_citations(nucliadb_reader: AsyncClient, knowledgebox, re predict.ndjson_answer.append(citations_chunk.model_dump_json() + "\n") # type: ignore resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={"query": "title", "citations": True, "citation_threshold": 0.5}, headers={"X-Synchronous": "true"}, ) @@ -248,10 +255,13 @@ async def test_ask_with_citations(nucliadb_reader: AsyncClient, knowledgebox, re @pytest.mark.parametrize("debug", (True, False)) -async def test_sync_ask_returns_debug_mode(nucliadb_reader: AsyncClient, knowledgebox, resource, debug): +@pytest.mark.deploy_modes("standalone") +async def test_sync_ask_returns_debug_mode( + nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource, debug +): # Make sure prompt context is returned if debug is True resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={"query": "title", "debug": debug}, headers={"X-Synchronous": "True"}, ) @@ -267,8 +277,8 @@ async def test_sync_ask_returns_debug_mode(nucliadb_reader: AsyncClient, knowled @pytest.fixture -async def resources(nucliadb_writer, knowledgebox): - kbid = knowledgebox +async def resources(nucliadb_writer: AsyncClient, standalone_knowledgebox: str): + kbid = standalone_knowledgebox rids = [] for i in range(2): resp = await nucliadb_writer.post( @@ -300,14 +310,17 @@ def parse_ask_response(resp): return results -async def test_ask_rag_options_full_resource(nucliadb_reader: AsyncClient, knowledgebox, resources): +@pytest.mark.deploy_modes("standalone") +async def test_ask_rag_options_full_resource( + nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources +): resource1, resource2 = resources predict = get_predict() predict.calls.clear() # type: ignore resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "title", "features": ["keyword", "semantic", "relations"], @@ -331,8 +344,9 @@ async def test_ask_rag_options_full_resource(nucliadb_reader: AsyncClient, knowl assert prompt_context[f"{resource2}/t/text_field"] == "The body of the text field" +@pytest.mark.deploy_modes("standalone") async def test_ask_full_resource_rag_strategy_with_exclude( - nucliadb_reader: AsyncClient, knowledgebox, resources + nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources ): resource1, resource2 = resources @@ -340,7 +354,7 @@ async def test_ask_full_resource_rag_strategy_with_exclude( predict.calls.clear() # type: ignore resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "title", "features": ["keyword", "semantic", "relations"], @@ -385,14 +399,17 @@ async def test_ask_full_resource_rag_strategy_with_exclude( assert prompt_context[f"{resource2}/t/text_field"] == "The body of the text field" -async def test_ask_rag_options_extend_with_fields(nucliadb_reader: AsyncClient, knowledgebox, resources): +@pytest.mark.deploy_modes("standalone") +async def test_ask_rag_options_extend_with_fields( + nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources +): resource1, resource2 = resources predict = get_predict() predict.calls.clear() # type: ignore resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "title", "features": ["keyword", "semantic", "relations"], @@ -491,7 +508,10 @@ async def test_ask_rag_options_extend_with_fields(nucliadb_reader: AsyncClient, ), ], ) -async def test_ask_rag_strategies_validation(nucliadb_reader, invalid_payload, expected_error_msg): +@pytest.mark.deploy_modes("standalone") +async def test_ask_rag_strategies_validation( + nucliadb_reader: AsyncClient, invalid_payload, expected_error_msg +): # Invalid strategy as a string resp = await nucliadb_reader.post( f"/kb/kbid/ask", @@ -503,10 +523,11 @@ async def test_ask_rag_strategies_validation(nucliadb_reader, invalid_payload, e assert expected_error_msg in error_msg -async def test_ask_capped_context(nucliadb_reader: AsyncClient, knowledgebox, resources): +@pytest.mark.deploy_modes("standalone") +async def test_ask_capped_context(nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources): # By default, max size is big enough to fit all the prompt context resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "title", "rag_strategies": [{"name": "full_resource"}], @@ -524,7 +545,7 @@ async def test_ask_capped_context(nucliadb_reader: AsyncClient, knowledgebox, re assert total_size > max_size * 3 resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "title", "rag_strategies": [{"name": "full_resource"}], @@ -540,15 +561,17 @@ async def test_ask_capped_context(nucliadb_reader: AsyncClient, knowledgebox, re assert total_size <= max_size * 3 -async def test_ask_on_a_kb_not_found(nucliadb_reader): +@pytest.mark.deploy_modes("standalone") +async def test_ask_on_a_kb_not_found(nucliadb_reader: AsyncClient): resp = await nucliadb_reader.post("/kb/unknown_kb_id/ask", json={"query": "title"}) assert resp.status_code == 404 -async def test_ask_max_tokens(nucliadb_reader, knowledgebox, resources): +@pytest.mark.deploy_modes("standalone") +async def test_ask_max_tokens(nucliadb_reader: AsyncClient, standalone_knowledgebox, resources): # As an integer resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "title", "max_tokens": 100, @@ -558,7 +581,7 @@ async def test_ask_max_tokens(nucliadb_reader, knowledgebox, resources): # Same but with the max tokens in a dict resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "title", "max_tokens": {"context": 100, "answer": 50}, @@ -568,8 +591,10 @@ async def test_ask_max_tokens(nucliadb_reader, knowledgebox, resources): # If the context requested is bigger than the max tokens, it should fail predict = get_predict() + assert isinstance(predict, DummyPredictEngine), "dummy is expected in this test" + resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "title", "max_tokens": {"context": predict.max_context + 1}, @@ -578,9 +603,10 @@ async def test_ask_max_tokens(nucliadb_reader, knowledgebox, resources): assert resp.status_code == 412 -async def test_ask_on_resource(nucliadb_reader: AsyncClient, knowledgebox, resource): +@pytest.mark.deploy_modes("standalone") +async def test_ask_on_resource(nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource): resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/resource/{resource}/ask", + f"/kb/{standalone_knowledgebox}/resource/{resource}/ask", json={"query": "title"}, headers={"X-Synchronous": "True"}, ) @@ -588,8 +614,12 @@ async def test_ask_on_resource(nucliadb_reader: AsyncClient, knowledgebox, resou SyncAskResponse.model_validate_json(resp.content) -async def test_ask_handles_stream_errors_on_predict(nucliadb_reader, knowledgebox, resource): +@pytest.mark.deploy_modes("standalone") +async def test_ask_handles_stream_errors_on_predict( + nucliadb_reader: AsyncClient, standalone_knowledgebox, resource +): predict = get_predict() + assert isinstance(predict, DummyPredictEngine), "dummy is expected in this test" prev = predict.ndjson_answer.copy() predict.ndjson_answer.pop(-1) @@ -599,7 +629,7 @@ async def test_ask_handles_stream_errors_on_predict(nucliadb_reader, knowledgebo # Sync ask resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={"query": "title"}, headers={"X-Synchronous": "True"}, ) @@ -610,7 +640,7 @@ async def test_ask_handles_stream_errors_on_predict(nucliadb_reader, knowledgebo # Stream ask resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={"query": "title"}, ) assert resp.status_code == 200 @@ -623,28 +653,34 @@ async def test_ask_handles_stream_errors_on_predict(nucliadb_reader, knowledgebo predict.ndjson_answer = prev -async def test_ask_handles_stream_unexpected_errors_sync(nucliadb_reader, knowledgebox, resource): +@pytest.mark.deploy_modes("standalone") +async def test_ask_handles_stream_unexpected_errors_sync( + nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource +): with mock.patch( "nucliadb.search.search.chat.ask.AskResult._stream", side_effect=ValueError("foobar"), ): # Sync ask -- should return a 500 resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={"query": "title"}, headers={"X-Synchronous": "True"}, ) assert resp.status_code == 500 -async def test_ask_handles_stream_unexpected_errors_stream(nucliadb_reader, knowledgebox, resource): +@pytest.mark.deploy_modes("standalone") +async def test_ask_handles_stream_unexpected_errors_stream( + nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource +): with mock.patch( "nucliadb.search.search.chat.ask.AskResult._stream", side_effect=ValueError("foobar"), ): # Stream ask -- should handle by yielding the error item resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={"query": "title"}, ) assert resp.status_code == 200 @@ -656,12 +692,13 @@ async def test_ask_handles_stream_unexpected_errors_stream(nucliadb_reader, know ) +@pytest.mark.deploy_modes("standalone") async def test_ask_with_json_schema_output( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox: str, resource, ): - resp = await nucliadb_reader.post(f"/kb/{knowledgebox}/ask", json={"query": "query"}) + resp = await nucliadb_reader.post(f"/kb/{standalone_knowledgebox}/ask", json={"query": "query"}) assert resp.status_code == 200 predict = get_predict() @@ -670,7 +707,7 @@ async def test_ask_with_json_schema_output( predict.ndjson_answer = [GenerativeChunk(chunk=predict_answer).model_dump_json() + "\n"] # type: ignore resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "title", "features": ["keyword", "semantic", "relations"], @@ -689,10 +726,13 @@ async def test_ask_with_json_schema_output( assert answer_json["confidence"] == 0.5 +@pytest.mark.deploy_modes("standalone") async def test_ask_assert_audit_retrieval_contexts( - nucliadb_reader: AsyncClient, knowledgebox, resources, audit + nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources, audit ): - resp = await nucliadb_reader.post(f"/kb/{knowledgebox}/ask", json={"query": "title", "debug": True}) + resp = await nucliadb_reader.post( + f"/kb/{standalone_knowledgebox}/ask", json={"query": "title", "debug": True} + ) assert resp.status_code == 200 retrieved_context = audit.chat.call_args_list[0].kwargs["retrieved_context"] @@ -701,11 +741,12 @@ async def test_ask_assert_audit_retrieval_contexts( } +@pytest.mark.deploy_modes("standalone") async def test_ask_rag_strategy_neighbouring_paragraphs( - nucliadb_reader: AsyncClient, knowledgebox, resources + nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources ): resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "title", "rag_strategies": [{"name": "neighbouring_paragraphs", "before": 2, "after": 2}], @@ -718,11 +759,12 @@ async def test_ask_rag_strategy_neighbouring_paragraphs( assert ask_response.prompt_context is not None +@pytest.mark.deploy_modes("standalone") async def test_ask_rag_strategy_metadata_extension( - nucliadb_reader: AsyncClient, knowledgebox, resources + nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources ): resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "title", "rag_strategies": [ @@ -757,7 +799,7 @@ async def test_ask_rag_strategy_metadata_extension( {"name": "field_extension", "fields": ["a/title", "a/summary"]}, ]: resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "title", "rag_strategies": [ @@ -782,9 +824,10 @@ async def test_ask_rag_strategy_metadata_extension( assert origin_found, ask_response.prompt_context -async def test_ask_top_k(nucliadb_reader: AsyncClient, knowledgebox, resources): +@pytest.mark.deploy_modes("standalone") +async def test_ask_top_k(nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources): resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "title", }, @@ -797,7 +840,7 @@ async def test_ask_top_k(nucliadb_reader: AsyncClient, knowledgebox, resources): # Check that the top_k is respected resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "title", "top_k": 1, @@ -810,18 +853,18 @@ async def test_ask_top_k(nucliadb_reader: AsyncClient, knowledgebox, resources): assert ask_response.retrieval_results.best_matches[0] == prev_best_matches[0] -@pytest.mark.asyncio @pytest.mark.parametrize("relation_ranking", ["generative", "reranker"]) @patch("nucliadb.search.search.graph_strategy.get_predict") @patch("nucliadb.search.search.graph_strategy.rank_relations_reranker") @patch("nucliadb.search.search.graph_strategy.rank_relations_generative") +@pytest.mark.deploy_modes("standalone") async def test_ask_graph_strategy( mocker_generative, mocker_reranker, mocker_predict, relation_ranking: str, nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox: str, graph_resource, ): # Mock the rank_relations functions to return the same relations with a score of 5 (no ranking) @@ -849,7 +892,7 @@ def mock_rank(relations, *args, **kwargs): } headers = {"X-Synchronous": "True"} - url = f"/kb/{knowledgebox}/ask" + url = f"/kb/{standalone_knowledgebox}/ask" async def assert_ask(d, expected_paragraphs_text, expected_paragraphs_relations): resp = await nucliadb_reader.post( @@ -955,9 +998,12 @@ async def assert_ask(d, expected_paragraphs_text, expected_paragraphs_relations) await assert_ask(data, expected_paragraphs_text, expected_paragraphs_relations) -async def test_ask_rag_strategy_prequeries(nucliadb_reader: AsyncClient, knowledgebox, resources): +@pytest.mark.deploy_modes("standalone") +async def test_ask_rag_strategy_prequeries( + nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources +): resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "", "rag_strategies": [ @@ -989,12 +1035,13 @@ async def test_ask_rag_strategy_prequeries(nucliadb_reader: AsyncClient, knowled assert len(ask_response.prequeries["title_query"].best_matches) > 1 +@pytest.mark.deploy_modes("standalone") async def test_ask_rag_strategy_prequeries_with_full_resource( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox: str, ): resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={ "query": "", "rag_strategies": [ @@ -1023,13 +1070,14 @@ async def test_ask_rag_strategy_prequeries_with_full_resource( assert resp.status_code == 200, resp.text +@pytest.mark.deploy_modes("standalone") async def test_ask_rag_strategy_prequeries_with_prefilter( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox: str, resources, ): resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", headers={"X-Synchronous": "True"}, json={ "query": "", @@ -1070,12 +1118,13 @@ async def test_ask_rag_strategy_prequeries_with_prefilter( assert ask_response.prequeries["prequery"].resources[expected_rid].title == "The title 0" +@pytest.mark.deploy_modes("standalone") async def test_ask_on_resource_with_json_schema_automatic_prequeries( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox: str, resource, ): - kbid = knowledgebox + kbid = standalone_knowledgebox rid = resource answer_json_schema = { "name": "book_ordering", @@ -1106,9 +1155,10 @@ async def test_ask_on_resource_with_json_schema_automatic_prequeries( assert len(ask_response.prequeries) == 4 +@pytest.mark.deploy_modes("standalone") async def test_all_rag_strategies_combinations( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox: str, resources, ): rag_strategies = [ @@ -1137,22 +1187,23 @@ def valid_combination(combination: list[RagStrategies]) -> bool: for combination in valid_combinations: # type: ignore print(f"Combination: {sorted([strategy.name for strategy in combination])}") resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", headers={"X-Synchronous": "True"}, json={ "query": "title", - "rag_strategies": [strategy.dict() for strategy in combination], + "rag_strategies": [strategy.model_dump() for strategy in combination], }, ) assert resp.status_code == 200, resp.text +@pytest.mark.deploy_modes("standalone") async def test_ask_fails_with_answer_json_schema_too_big( nucliadb_reader: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, resources: list[str], ): - kbid = knowledgebox + kbid = standalone_knowledgebox rid = resources[0] resp = await nucliadb_reader.post( @@ -1184,13 +1235,14 @@ async def test_ask_fails_with_answer_json_schema_too_big( ) +@pytest.mark.deploy_modes("standalone") async def test_rag_image_rag_strategies( nucliadb_reader: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, resources: list[str], ): resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", headers={"X-Synchronous": "True"}, json={ "query": "title", @@ -1199,7 +1251,7 @@ async def test_rag_image_rag_strategies( assert resp.status_code == 200, resp.text resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", headers={"X-Synchronous": "True"}, json={ "query": "title", @@ -1220,10 +1272,13 @@ async def test_rag_image_rag_strategies( assert resp.status_code == 200, resp.text -async def test_ask_skip_answer_generation(nucliadb_reader: AsyncClient, knowledgebox, resource): +@pytest.mark.deploy_modes("standalone") +async def test_ask_skip_answer_generation( + nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource +): # Synchronous resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={"query": "title", "generate_answer": False, "debug": True}, headers={"X-Synchronous": "True"}, ) @@ -1237,7 +1292,7 @@ async def test_ask_skip_answer_generation(nucliadb_reader: AsyncClient, knowledg # Streaming resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/ask", + f"/kb/{standalone_knowledgebox}/ask", json={"query": "title", "generate_answer": False, "debug": True}, ) assert resp.status_code == 200 diff --git a/nucliadb/tests/nucliadb/integration/test_conversation.py b/nucliadb/tests/nucliadb/integration/test_conversation.py index 1dd203c200..198bd78561 100644 --- a/nucliadb/tests/nucliadb/integration/test_conversation.py +++ b/nucliadb/tests/nucliadb/integration/test_conversation.py @@ -45,7 +45,9 @@ @pytest.fixture(scope="function") -async def resource_with_conversation(nucliadb_grpc, nucliadb_writer, knowledgebox): +async def resource_with_conversation( + nucliadb_ingest_grpc, nucliadb_writer: AsyncClient, standalone_knowledgebox +): messages = [] for i in range(1, 301): messages.append( @@ -55,13 +57,13 @@ async def resource_with_conversation(nucliadb_grpc, nucliadb_writer, knowledgebo timestamp=datetime.now(), content=InputMessageContent(text="What is the meaning of life?"), ident=str(i), - type=MessageType.QUESTION.value, + type=MessageType.QUESTION, ) ) resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", headers={"Content-Type": "application/json"}, - data=CreateResourcePayload( + content=CreateResourcePayload( slug="myresource", conversations={ "faq": InputConversationField(messages=messages), @@ -74,13 +76,13 @@ async def resource_with_conversation(nucliadb_grpc, nucliadb_writer, knowledgebo # add another message using the api to add single message resp = await nucliadb_writer.put( - f"/kb/{knowledgebox}/resource/{rid}/conversation/faq/messages", - data="[" + f"/kb/{standalone_knowledgebox}/resource/{rid}/conversation/faq/messages", + content="[" + InputMessage( to=[f"computer"], content=InputMessageContent(text="42"), ident="computer", - type=MessageType.ANSWER.value, + type=MessageType.ANSWER, ).model_dump_json(by_alias=True) + "]", ) @@ -92,7 +94,7 @@ async def resource_with_conversation(nucliadb_grpc, nucliadb_writer, knowledgebo bm = BrokerMessage() bm.uuid = rid - bm.kbid = knowledgebox + bm.kbid = standalone_knowledgebox field = FieldID(field="faq", field_type=FieldType.CONVERSATION) etw = ExtractedTextWrapper() @@ -108,20 +110,21 @@ async def resource_with_conversation(nucliadb_grpc, nucliadb_writer, knowledgebo fmw.metadata.split_metadata[split].paragraphs.append(paragraph) bm.field_metadata.append(fmw) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) yield rid +@pytest.mark.deploy_modes("standalone") async def test_conversations( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox, resource_with_conversation, ): rid = resource_with_conversation # get field summary - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}?show=values") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}?show=values") assert resp.status_code == 200 res_resp = ResponseResponse.model_validate(resp.json()) @@ -133,7 +136,9 @@ async def test_conversations( ) # get first page - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}/conversation/faq?page=1") + resp = await nucliadb_reader.get( + f"/kb/{standalone_knowledgebox}/resource/{rid}/conversation/faq?page=1" + ) assert resp.status_code == 200 field_resp = ResourceField.model_validate(resp.json()) msgs = field_resp.value["messages"] # type: ignore @@ -142,7 +147,9 @@ async def test_conversations( assert msgs[0]["type"] == MessageType.QUESTION.value # get second page - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}/conversation/faq?page=2") + resp = await nucliadb_reader.get( + f"/kb/{standalone_knowledgebox}/resource/{rid}/conversation/faq?page=2" + ) assert resp.status_code == 200 field_resp = ResourceField.model_validate(resp.json()) msgs = field_resp.value["messages"] # type: ignore @@ -151,15 +158,16 @@ async def test_conversations( assert msgs[-1]["type"] == MessageType.ANSWER.value +@pytest.mark.deploy_modes("standalone") async def test_extracted_text_is_serialized_properly( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox, resource_with_conversation, ): rid = resource_with_conversation resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}?show=values&show=extracted&extracted=text", + f"/kb/{standalone_knowledgebox}/resource/{rid}?show=values&show=extracted&extracted=text", ) assert resp.status_code == 200 resource = Resource.model_validate(resp.json()) @@ -169,15 +177,16 @@ async def test_extracted_text_is_serialized_properly( assert extracted.text.split_text["2"] == "Split text 2" # type: ignore +@pytest.mark.deploy_modes("standalone") async def test_find_conversations( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox: str, resource_with_conversation, ): rid = resource_with_conversation resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/find?query=", + f"/kb/{standalone_knowledgebox}/find?query=", ) assert resp.status_code == 200 results = KnowledgeboxFindResults.model_validate(resp.json()) @@ -185,7 +194,7 @@ async def test_find_conversations( assert matching_rid == rid resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}?show=values&show=extracted&extracted=text" + f"/kb/{standalone_knowledgebox}/resource/{rid}?show=values&show=extracted&extracted=text" ) assert resp.status_code == 200 resource = Resource.model_validate(resp.json()) @@ -205,7 +214,10 @@ async def test_find_conversations( assert paragraphs[f"{rid}/c/faq/2/0-12"].text == "Split text 2" -async def test_cannot_create_message_ident_0(nucliadb_grpc, nucliadb_writer, knowledgebox): +@pytest.mark.deploy_modes("standalone") +async def test_cannot_create_message_ident_0( + nucliadb_ingest_grpc, nucliadb_writer: AsyncClient, standalone_knowledgebox: str +): messages = [ # model_construct skips validation, to test the API error InputMessage.model_construct( @@ -214,13 +226,13 @@ async def test_cannot_create_message_ident_0(nucliadb_grpc, nucliadb_writer, kno timestamp=datetime.now(), content=InputMessageContent(text="What is the meaning of life?"), ident="0", - type=MessageType.QUESTION.value, + type=MessageType.QUESTION, ) ] resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", headers={"Content-Type": "application/json"}, - data=CreateResourcePayload( + content=CreateResourcePayload( slug="myresource", conversations={ "faq": InputConversationField(messages=messages), diff --git a/nucliadb/tests/nucliadb/integration/test_counters.py b/nucliadb/tests/nucliadb/integration/test_counters.py index d54b772ff7..13628f68aa 100644 --- a/nucliadb/tests/nucliadb/integration/test_counters.py +++ b/nucliadb/tests/nucliadb/integration/test_counters.py @@ -17,20 +17,22 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # +import pytest from httpx import AsyncClient +@pytest.mark.deploy_modes("standalone") async def test_counters( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): # PUBLIC API - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}") assert resp.status_code == 200 resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My title", "slug": "myresource", @@ -40,18 +42,18 @@ async def test_counters( assert resp.status_code == 201 resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={"slug": "myresource2", "title": "mytitle1"}, ) assert resp.status_code == 201 resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={"slug": "myresource3", "title": "mytitle1"}, ) assert resp.status_code == 201 - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/counters") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/counters") assert resp.status_code == 200 assert resp.json()["resources"] == 3 diff --git a/nucliadb/tests/nucliadb/integration/test_data_augmentation_field_generation.py b/nucliadb/tests/nucliadb/integration/test_data_augmentation_field_generation.py index 884725b784..05ebd47ba9 100644 --- a/nucliadb/tests/nucliadb/integration/test_data_augmentation_field_generation.py +++ b/nucliadb/tests/nucliadb/integration/test_data_augmentation_field_generation.py @@ -177,13 +177,14 @@ async def test_send_to_process_generated_fields( assert "/g/da/author" in index_message.texts[f"t/{da_field}"].labels +@pytest.mark.deploy_modes("standalone") async def test_data_augmentation_field_generation_and_search( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox: str, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox: str, ): - kbid = knowledgebox + kbid = standalone_knowledgebox slug = "my-resource" field_id = "my-text" @@ -226,7 +227,7 @@ async def test_data_augmentation_field_generation_and_search( field_metadata.field.CopyFrom(field_id_pb) field_metadata.metadata.metadata.paragraphs.append(Paragraph(start=0, end=25)) bm.field_metadata.append(field_metadata) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) # Data augmentation broker message bm = BrokerMessage() @@ -237,7 +238,7 @@ async def test_data_augmentation_field_generation_and_search( bm.texts[da_field_id].body = "Text author" bm.texts[da_field_id].md5 = hashlib.md5("Text author".encode()).hexdigest() bm.texts[da_field_id].generated_by.data_augmentation.SetInParent() - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) # Processed DA resource (from processing) bm = BrokerMessage() @@ -261,7 +262,7 @@ async def test_data_augmentation_field_generation_and_search( field_metadata.field.CopyFrom(da_field_id_pb) field_metadata.metadata.metadata.paragraphs.append(Paragraph(start=0, end=28)) bm.field_metadata.append(field_metadata) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) # Now validate we can search and filter out data augmentation fields resp = await nucliadb_reader.post( diff --git a/nucliadb/tests/nucliadb/integration/test_deletion.py b/nucliadb/tests/nucliadb/integration/test_deletion.py index 2633b4d957..dcbdf6e2a5 100644 --- a/nucliadb/tests/nucliadb/integration/test_deletion.py +++ b/nucliadb/tests/nucliadb/integration/test_deletion.py @@ -19,6 +19,7 @@ # import dataclasses +import pytest from httpx import AsyncClient from nucliadb.common import datamanagers @@ -46,16 +47,19 @@ class FieldData: vector: tuple[str, list[float]] +@pytest.mark.deploy_modes("standalone") async def test_paragraph_index_deletions( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): # Prepare data for a resource with title, summary and a text field async with datamanagers.with_ro_transaction() as txn: - vectorsets = [vs async for _, vs in datamanagers.vectorsets.iter(txn, kbid=knowledgebox)] + vectorsets = [ + vs async for _, vs in datamanagers.vectorsets.iter(txn, kbid=standalone_knowledgebox) + ] assert len(vectorsets) == 1 vectorset_id = vectorsets[0].vectorset_id vector_dimension = vectorsets[0].vectorset_index_config.vector_dimension @@ -86,7 +90,7 @@ async def test_paragraph_index_deletions( # Create a resource with a simple text field resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": title_field.text, "summary": summary_field.text, @@ -104,7 +108,7 @@ async def test_paragraph_index_deletions( # Check that searching for original texts returns title and summary (text is # not indexed) resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={ "query": "Original", "features": [SearchOptions.KEYWORD], @@ -120,14 +124,16 @@ async def test_paragraph_index_deletions( assert list(sorted(fields.keys())) == ["/a/summary", "/a/title"] # Inject corresponding broker message as if it was coming from the processor - bmb = BrokerMessageBuilder(kbid=knowledgebox, rid=rid, source=BrokerMessage.MessageSource.PROCESSOR) + bmb = BrokerMessageBuilder( + kbid=standalone_knowledgebox, rid=rid, source=BrokerMessage.MessageSource.PROCESSOR + ) bm = prepare_broker_message(bmb, title_field, summary_field, text_field) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) await wait_for_sync() # wait until changes are searchable # Check that searching for original texts does not return any results resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={ "query": "Original", "features": [SearchOptions.KEYWORD], @@ -141,7 +147,7 @@ async def test_paragraph_index_deletions( # Check that searching for extracted texts returns all fields resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={ "query": "Extracted", "features": [SearchOptions.KEYWORD], @@ -164,7 +170,7 @@ async def test_paragraph_index_deletions( ) resp = await nucliadb_writer.patch( - f"/kb/{knowledgebox}/resource/{rid}", + f"/kb/{standalone_knowledgebox}/resource/{rid}", json={ "texts": { text_field.field_id: { @@ -177,15 +183,17 @@ async def test_paragraph_index_deletions( assert resp.status_code == 200 # Inject broker message with the modified text - bmb = BrokerMessageBuilder(kbid=knowledgebox, rid=rid, source=BrokerMessage.MessageSource.PROCESSOR) + bmb = BrokerMessageBuilder( + kbid=standalone_knowledgebox, rid=rid, source=BrokerMessage.MessageSource.PROCESSOR + ) bm = prepare_broker_message(bmb, title_field, summary_field, text_field) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) await wait_for_sync() # wait until changes are searchable # Check that searching for the first extracted text now doesn't return the # text field (as it has been modified) resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={ "query": "Extracted", "features": [SearchOptions.KEYWORD], @@ -202,7 +210,7 @@ async def test_paragraph_index_deletions( # Check that searching for the modified text only returns the text field resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={ "query": "Modified", "features": [SearchOptions.KEYWORD], diff --git a/nucliadb/tests/nucliadb/integration/test_entities.py b/nucliadb/tests/nucliadb/integration/test_entities.py index 9ab8e00b0c..2f914ecdb3 100644 --- a/nucliadb/tests/nucliadb/integration/test_entities.py +++ b/nucliadb/tests/nucliadb/integration/test_entities.py @@ -65,9 +65,9 @@ @pytest.fixture async def text_field( nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, ): - kbid = knowledgebox + kbid = standalone_knowledgebox field_id = "text-field" resp = await nucliadb_writer.post( @@ -93,12 +93,12 @@ async def text_field( @pytest.fixture -async def processing_entities(nucliadb_grpc: WriterStub, knowledgebox: str): +async def processing_entities(nucliadb_ingest_grpc: WriterStub, standalone_knowledgebox: str): entities = { "cat": {"value": "cat"}, "dolphin": {"value": "dolphin"}, } - bm = broker_resource(knowledgebox, slug="automatic-entities") + bm = broker_resource(standalone_knowledgebox, slug="automatic-entities") ufm = UserFieldMetadata( field=FieldID(field_type=FieldType.GENERIC, field="title"), token=[TokenSplit(token="cat", start=0, end=3, klass="ANIMALS")], @@ -121,13 +121,13 @@ async def processing_entities(nucliadb_grpc: WriterStub, knowledgebox: str): ) ) bm.relations.extend(relations) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) await wait_for_sync() @pytest.fixture async def annotated_entities( - nucliadb_writer: AsyncClient, text_field: tuple[str, str, str], nucliadb_grpc + nucliadb_writer: AsyncClient, text_field: tuple[str, str, str], nucliadb_ingest_grpc ): kbid, rid, field_id = text_field @@ -167,7 +167,7 @@ async def annotated_entities( bm_indexed = 0 retries = 0 while not bm_indexed: - response: GetEntitiesGroupResponse = await nucliadb_grpc.GetEntitiesGroup( + response: GetEntitiesGroupResponse = await nucliadb_ingest_grpc.GetEntitiesGroup( GetEntitiesGroupRequest(kb=KnowledgeBoxID(uuid=kbid), group="ANIMALS") ) bm_indexed = "bird" in response.group.entities @@ -180,7 +180,7 @@ async def annotated_entities( @pytest.fixture -async def user_entities(nucliadb_writer: AsyncClient, knowledgebox: str): +async def user_entities(nucliadb_writer: AsyncClient, standalone_knowledgebox: str): await wait_for_sync() payload = CreateEntitiesGroupPayload( group="ANIMALS", @@ -192,31 +192,32 @@ async def user_entities(nucliadb_writer: AsyncClient, knowledgebox: str): title="Animals", color="black", ) - resp = await create_entities_group(nucliadb_writer, knowledgebox, payload) + resp = await create_entities_group(nucliadb_writer, standalone_knowledgebox, payload) assert resp.status_code == 200 @pytest.fixture async def entities( - nucliadb_grpc: WriterStub, - knowledgebox: str, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox: str, user_entities, processing_entities, annotated_entities, ): """Single fixture to get entities injected in different ways.""" # Ensure entities are properly stored/indexed - await wait_until_entity(nucliadb_grpc, knowledgebox, "ANIMALS", "cat") - await wait_until_entity(nucliadb_grpc, knowledgebox, "ANIMALS", "dolphin") - await wait_until_entity(nucliadb_grpc, knowledgebox, "ANIMALS", "bird") + await wait_until_entity(nucliadb_ingest_grpc, standalone_knowledgebox, "ANIMALS", "cat") + await wait_until_entity(nucliadb_ingest_grpc, standalone_knowledgebox, "ANIMALS", "dolphin") + await wait_until_entity(nucliadb_ingest_grpc, standalone_knowledgebox, "ANIMALS", "bird") +@pytest.mark.deploy_modes("standalone") async def test_get_entities_groups( nucliadb_reader: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, entities, ): - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await nucliadb_reader.get(f"/kb/{kbid}/entitiesgroup/ANIMALS") assert resp.status_code == 200 @@ -240,12 +241,13 @@ async def test_get_entities_groups( assert body["detail"] == "Entities group 'I-DO-NOT-EXIST' does not exist" +@pytest.mark.deploy_modes("standalone") async def test_list_entities_groups( nucliadb_reader: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, entities, ): - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await nucliadb_reader.get(f"/kb/{kbid}/entitiesgroups?show_entities=false") assert resp.status_code == 200 @@ -256,13 +258,14 @@ async def test_list_entities_groups( assert len(body["groups"]["ANIMALS"]["entities"]) == 0 +@pytest.mark.deploy_modes("standalone") async def test_create_entities_group_twice( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, entities, ): - kbid = knowledgebox + kbid = standalone_knowledgebox payload = CreateEntitiesGroupPayload( group="ANIMALS", @@ -274,13 +277,14 @@ async def test_create_entities_group_twice( assert resp.status_code == 409 +@pytest.mark.deploy_modes("standalone") async def test_update_entities_group( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, entities, ): - kbid = knowledgebox + kbid = standalone_knowledgebox update = UpdateEntitiesGroupPayload( add={"seal": Entity(value="seal")}, @@ -303,13 +307,14 @@ async def test_update_entities_group( assert body["entities"]["dog"]["value"] == "updated-dog" +@pytest.mark.deploy_modes("standalone") async def test_update_indexed_entities_group( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, processing_entities, ): - kbid = knowledgebox + kbid = standalone_knowledgebox update = UpdateEntitiesGroupPayload( add={"seal": Entity(value="seal")}, @@ -330,13 +335,14 @@ async def test_update_indexed_entities_group( assert body["entities"]["dolphin"]["value"] == "updated-dolphin" +@pytest.mark.deploy_modes("standalone") async def test_update_entities_group_metadata( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, entities, ): - kbid = knowledgebox + kbid = standalone_knowledgebox update = UpdateEntitiesGroupPayload( title="Updated Animals", @@ -353,13 +359,14 @@ async def test_update_entities_group_metadata( assert body["color"] == "red" +@pytest.mark.deploy_modes("standalone") async def test_delete_entities_group( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, entities, ): - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await delete_entities_group(nucliadb_writer, kbid, "ANIMALS") assert resp.status_code == 200 @@ -368,13 +375,14 @@ async def test_delete_entities_group( assert resp.status_code == 404 +@pytest.mark.deploy_modes("standalone") async def test_delete_and_recreate_entities_group( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, user_entities, ): - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await delete_entities_group(nucliadb_writer, kbid, "ANIMALS") assert resp.status_code == 200 @@ -385,7 +393,7 @@ async def test_delete_and_recreate_entities_group( title="Animals", color="white", ) - resp = await create_entities_group(nucliadb_writer, knowledgebox, payload) + resp = await create_entities_group(nucliadb_writer, standalone_knowledgebox, payload) assert resp.status_code == 200 resp = await nucliadb_reader.get(f"/kb/{kbid}/entitiesgroup/ANIMALS") @@ -395,15 +403,16 @@ async def test_delete_and_recreate_entities_group( assert body["color"] == "white" +@pytest.mark.deploy_modes("standalone") async def test_entities_indexing( nucliadb_reader: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, entities, predict_mock, ): # TODO: improve test cases here - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await nucliadb_reader.get( f"/kb/{kbid}/suggest", diff --git a/nucliadb/tests/nucliadb/integration/test_export_import.py b/nucliadb/tests/nucliadb/integration/test_export_import.py index 74f18b8f11..a1fdb84b75 100644 --- a/nucliadb/tests/nucliadb/integration/test_export_import.py +++ b/nucliadb/tests/nucliadb/integration/test_export_import.py @@ -38,10 +38,12 @@ @pytest.fixture(scope="function") -async def src_kb(nucliadb_writer: AsyncClient, nucliadb_manager: AsyncClient) -> AsyncIterator[str]: +async def src_kb( + nucliadb_writer: AsyncClient, nucliadb_writer_manager: AsyncClient +) -> AsyncIterator[str]: slug = uuid.uuid4().hex - resp = await nucliadb_manager.post("/kbs", json={"slug": slug}) + resp = await nucliadb_writer_manager.post("/kbs", json={"slug": slug}) assert resp.status_code == 201 kbid = resp.json().get("uuid") @@ -117,7 +119,7 @@ async def src_kb(nucliadb_writer: AsyncClient, nucliadb_manager: AsyncClient) -> assert resp.status_code == 200 yield kbid - resp = await nucliadb_manager.delete(f"/kb/{kbid}") + resp = await nucliadb_writer_manager.delete(f"/kb/{kbid}") try: assert resp.status_code == 200 except AssertionError: @@ -125,12 +127,12 @@ async def src_kb(nucliadb_writer: AsyncClient, nucliadb_manager: AsyncClient) -> @pytest.fixture(scope="function") -async def dst_kb(nucliadb_manager: AsyncClient) -> AsyncIterator[str]: - resp = await nucliadb_manager.post("/kbs", json={"slug": "dst_kb"}) +async def dst_kb(nucliadb_writer_manager: AsyncClient) -> AsyncIterator[str]: + resp = await nucliadb_writer_manager.post("/kbs", json={"slug": "dst_kb"}) assert resp.status_code == 201 uuid = resp.json().get("uuid") yield uuid - resp = await nucliadb_manager.delete(f"/kb/{uuid}") + resp = await nucliadb_writer_manager.delete(f"/kb/{uuid}") try: assert resp.status_code == 200 except AssertionError: @@ -149,6 +151,7 @@ def standalone_nucliadb(): yield +@pytest.mark.deploy_modes("standalone") async def test_on_standalone_nucliadb( standalone_nucliadb, natsd, @@ -208,6 +211,7 @@ async def imports_consumer(context: ApplicationContext) -> AsyncIterator[NatsTas # await consumer.finalize() +@pytest.mark.deploy_modes("standalone") async def test_on_hosted_nucliadb( hosted_nucliadb, nucliadb_writer: AsyncClient, @@ -256,6 +260,7 @@ async def _test_export_import_kb_api( await _test_learning_config_mismatch(nucliadb_writer, export, dst_kb) +@pytest.mark.deploy_modes("standalone") async def test_export_and_create_kb_from_import_api( standalone_nucliadb, nucliadb_writer: AsyncClient, @@ -292,6 +297,7 @@ async def test_export_and_create_kb_from_import_api( await _test_learning_config_mismatch(nucliadb_writer, export, dst_kb) +@pytest.mark.deploy_modes("standalone") async def _test_learning_config_mismatch( nucliadb_writer: AsyncClient, export: BytesIO, diff --git a/nucliadb/tests/nucliadb/integration/test_field_external_file.py b/nucliadb/tests/nucliadb/integration/test_field_external_file.py index 1a82d6a01d..70eb63b76b 100644 --- a/nucliadb/tests/nucliadb/integration/test_field_external_file.py +++ b/nucliadb/tests/nucliadb/integration/test_field_external_file.py @@ -18,6 +18,7 @@ # along with this program. If not, see . # import pytest +from httpx import AsyncClient from nucliadb.writer.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RESOURCES_PREFIX from nucliadb_utils.settings import nuclia_settings @@ -40,14 +41,15 @@ def nuclia_jwt_key(): yield +@pytest.mark.deploy_modes("standalone") async def test_external_file_field( nuclia_jwt_key, - nucliadb_reader, - nucliadb_writer, - knowledgebox, + nucliadb_reader: AsyncClient, + nucliadb_writer: AsyncClient, + standalone_knowledgebox, ): # Create a resource - kb_path = f"/{KB_PREFIX}/{knowledgebox}" + kb_path = f"/{KB_PREFIX}/{standalone_knowledgebox}" resp = await nucliadb_writer.post( f"{kb_path}/{RESOURCES_PREFIX}", json={ diff --git a/nucliadb/tests/nucliadb/integration/test_find.py b/nucliadb/tests/nucliadb/integration/test_find.py index 261507064c..d4446ce1b7 100644 --- a/nucliadb/tests/nucliadb/integration/test_find.py +++ b/nucliadb/tests/nucliadb/integration/test_find.py @@ -39,14 +39,15 @@ from tests.utils import inject_message +@pytest.mark.deploy_modes("standalone") async def test_find_with_label_changes( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "slug": "myresource", "title": "My Title", @@ -61,7 +62,7 @@ async def test_find_with_label_changes( # should get 1 result resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={ "query": "title", }, @@ -72,7 +73,7 @@ async def test_find_with_label_changes( # assert we get no results with label filter resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={"query": "title", "filters": ["/classification.labels/labels/label1"]}, ) assert resp.status_code == 200 @@ -81,7 +82,7 @@ async def test_find_with_label_changes( # add new label resp = await nucliadb_writer.patch( - f"/kb/{knowledgebox}/resource/{rid}", + f"/kb/{standalone_knowledgebox}/resource/{rid}", json={ # "title": "My new title", "usermetadata": { @@ -101,7 +102,7 @@ async def test_find_with_label_changes( # we should get 1 result now with updated label resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={"query": "title", "filters": ["/classification.labels/labels/label1"]}, ) assert resp.status_code == 200 @@ -109,32 +110,34 @@ async def test_find_with_label_changes( assert len(body["resources"]) == 1 +@pytest.mark.deploy_modes("standalone") async def test_find_does_not_support_fulltext_search( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/find?query=title&features=fulltext&features=keyword", + f"/kb/{standalone_knowledgebox}/find?query=title&features=fulltext&features=keyword", ) assert resp.status_code == 422 assert "fulltext search not supported" in resp.json()["detail"][0]["msg"] resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={"query": "title", "features": [SearchOptions.FULLTEXT, SearchOptions.KEYWORD]}, ) assert resp.status_code == 422 assert "fulltext search not supported" in resp.json()["detail"][0]["msg"] +@pytest.mark.deploy_modes("standalone") async def test_find_resource_filters( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My Title", "summary": "My summary", @@ -145,7 +148,7 @@ async def test_find_resource_filters( rid1 = resp.json()["uuid"] resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My Title", "summary": "My summary", @@ -157,7 +160,7 @@ async def test_find_resource_filters( # Should get 2 result resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={ "query": "title", }, @@ -169,7 +172,7 @@ async def test_find_resource_filters( # Check that resource filtering works for rid in [rid1, rid2]: resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={ "query": "title", "resource_filters": [rid], @@ -181,19 +184,20 @@ async def test_find_resource_filters( assert rid in body["resources"] +@pytest.mark.deploy_modes("standalone") async def test_find_min_score( nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): # When not specifying the min score on the request # it should default to 0 for bm25 and 0.7 for semantic - resp = await nucliadb_reader.post(f"/kb/{knowledgebox}/find", json={"query": "dummy"}) + resp = await nucliadb_reader.post(f"/kb/{standalone_knowledgebox}/find", json={"query": "dummy"}) assert resp.status_code == 200 assert resp.json()["min_score"] == {"bm25": 0, "semantic": 0.7} # When specifying the min score on the request resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={"query": "dummy", "min_score": {"bm25": 10, "semantic": 0.5}}, ) assert resp.status_code == 200 @@ -201,19 +205,20 @@ async def test_find_min_score( # Check that old api still works resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", json={"query": "dummy", "min_score": 0.5} + f"/kb/{standalone_knowledgebox}/find", json={"query": "dummy", "min_score": 0.5} ) assert resp.status_code == 200 assert resp.json()["min_score"] == {"bm25": 0, "semantic": 0.5} +@pytest.mark.deploy_modes("standalone") async def test_story_7286( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "slug": "myresource", "title": "My Title", @@ -225,7 +230,7 @@ async def test_story_7286( rid = resp.json()["uuid"] resp = await nucliadb_writer.patch( - f"/kb/{knowledgebox}/resource/{rid}", + f"/kb/{standalone_knowledgebox}/resource/{rid}", json={ "fieldmetadata": [ { @@ -248,7 +253,7 @@ async def test_story_7286( with patch("nucliadb.search.search.hydrator.managed_serialize", return_value=None): # should get no result (because serialize returns None, as the resource is not found in the DB) resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={ "query": "title", "features": [SearchOptions.KEYWORD, SearchOptions.SEMANTIC, SearchOptions.RELATIONS], @@ -265,13 +270,14 @@ async def test_story_7286( assert len(body["resources"]) == 0 +@pytest.mark.deploy_modes("standalone") async def test_find_marks_fuzzy_results( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "slug": "myresource", "title": "My Title", @@ -281,7 +287,7 @@ async def test_find_marks_fuzzy_results( # Should get only one non-fuzzy result resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={ "query": "Title", }, @@ -292,7 +298,7 @@ async def test_find_marks_fuzzy_results( # Should get only one fuzzy result resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={ "query": "totle", }, @@ -303,7 +309,7 @@ async def test_find_marks_fuzzy_results( # Should not get any result if exact match term queried resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={ "query": '"totle"', }, @@ -323,6 +329,7 @@ def check_fuzzy_paragraphs(find_response, *, fuzzy_result: bool, n_expected: int assert found == n_expected +@pytest.mark.deploy_modes("standalone") async def test_find_returns_best_matches( nucliadb_reader: AsyncClient, philosophy_books_kb, @@ -360,10 +367,11 @@ def find_with_limits_exceeded_error(): yield +@pytest.mark.deploy_modes("standalone") async def test_find_handles_limits_exceeded_error( - nucliadb_reader, knowledgebox, find_with_limits_exceeded_error + nucliadb_reader: AsyncClient, standalone_knowledgebox, find_with_limits_exceeded_error ): - kb = knowledgebox + kb = standalone_knowledgebox resp = await nucliadb_reader.get(f"/kb/{kb}/find") assert resp.status_code == 402 assert resp.json() == {"detail": "over the quota"} @@ -373,12 +381,13 @@ async def test_find_handles_limits_exceeded_error( assert resp.json() == {"detail": "over the quota"} +@pytest.mark.deploy_modes("standalone") async def test_find_keyword_filters( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): - kbid = knowledgebox + kbid = standalone_knowledgebox # Create a couple of resources with different keywords in the title resp = await nucliadb_writer.post( f"/kb/{kbid}/resources", @@ -469,6 +478,7 @@ async def test_find_keyword_filters( ), f"Keyword filters: {keyword_filters}, expected rids: {expected_rids}" +@pytest.mark.deploy_modes("standalone") async def test_find_highlight( nucliadb_reader: AsyncClient, philosophy_books_kb: str, @@ -496,15 +506,16 @@ async def test_find_highlight( assert "Marcus Aurelius" in match["text"] +@pytest.mark.deploy_modes("standalone") async def test_find_fields_parameter( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox: str, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox: str, ): text = "This is a text" resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "slug": "myresource", "title": "My Title", @@ -521,7 +532,7 @@ async def test_find_fields_parameter( rid = resp.json()["uuid"] bm = BrokerMessage() - bm.kbid = knowledgebox + bm.kbid = standalone_knowledgebox bm.uuid = rid field = FieldID(field_type=FieldType.TEXT, field="text1") @@ -537,7 +548,7 @@ async def test_find_fields_parameter( evw.vectors.vectors.vectors.append(vector) bm.field_vectors.append(evw) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) # Semantic search only on text fields should work for fields_param, expected_n_resources in [ @@ -546,7 +557,7 @@ async def test_find_fields_parameter( (["u"], 0), ]: resp = await nucliadb_reader.post( - f"/kb/{knowledgebox}/find", + f"/kb/{standalone_knowledgebox}/find", json={ "query": text, "features": ["semantic"], diff --git a/nucliadb/tests/nucliadb/integration/test_labels.py b/nucliadb/tests/nucliadb/integration/test_labels.py index f984875a36..f77b464fbe 100644 --- a/nucliadb/tests/nucliadb/integration/test_labels.py +++ b/nucliadb/tests/nucliadb/integration/test_labels.py @@ -20,6 +20,7 @@ import uuid from datetime import datetime +import pytest from httpx import AsyncClient from nucliadb.ingest.orm.resource import ( @@ -42,15 +43,16 @@ from nucliadb_models.writer import CreateResourcePayload from nucliadb_protos import resources_pb2 as rpb from nucliadb_protos.writer_pb2 import BrokerMessage +from nucliadb_protos.writer_pb2_grpc import WriterStub from tests.utils import inject_message -def broker_resource(knowledgebox: str) -> BrokerMessage: +def broker_resource(standalone_knowledgebox: str) -> BrokerMessage: rid = str(uuid.uuid4()) slug = f"{rid}slug1" bm: BrokerMessage = BrokerMessage( - kbid=knowledgebox, + kbid=standalone_knowledgebox, uuid=rid, slug=slug, type=BrokerMessage.AUTOCOMMIT, @@ -157,24 +159,25 @@ def broker_resource(knowledgebox: str) -> BrokerMessage: return bm -async def inject_resource_with_paragraph_labels(knowledgebox, writer): - bm = broker_resource(knowledgebox) +async def inject_resource_with_paragraph_labels(standalone_knowledgebox, writer): + bm = broker_resource(standalone_knowledgebox) await inject_message(writer, bm) return bm.uuid +@pytest.mark.deploy_modes("standalone") async def test_labels_global( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): # PUBLIC API - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}") assert resp.status_code == 200 resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/labelset/label1", + f"/kb/{standalone_knowledgebox}/labelset/label1", json={ "title": "mylabel", "multiple": False, @@ -183,21 +186,22 @@ async def test_labels_global( ) assert resp.status_code == 200 - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/labelsets") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/labelsets") assert resp.status_code == 200 assert len(resp.json()["labelsets"]) == 1 assert resp.json()["labelsets"]["label1"]["multiple"] is False - rid = await inject_resource_with_paragraph_labels(knowledgebox, nucliadb_grpc) + rid = await inject_resource_with_paragraph_labels(standalone_knowledgebox, nucliadb_ingest_grpc) - resp = await nucliadb_writer.post(f"/kb/{knowledgebox}/resource/{rid}/reindex") + resp = await nucliadb_writer.post(f"/kb/{standalone_knowledgebox}/resource/{rid}/reindex") assert resp.status_code == 200 +@pytest.mark.deploy_modes("standalone") async def test_classification_labels_cancelled_by_the_user( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): expected_label = { "label": "label", @@ -205,7 +209,7 @@ async def test_classification_labels_cancelled_by_the_user( "cancelled_by_user": True, } resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My Resource", "summary": "My summary", @@ -217,7 +221,7 @@ async def test_classification_labels_cancelled_by_the_user( # Check cancelled labels come in resource get resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}", + f"/kb/{standalone_knowledgebox}/resource/{rid}", ) assert resp.status_code == 200 content = resp.json() @@ -225,26 +229,27 @@ async def test_classification_labels_cancelled_by_the_user( # Check cancelled labels come in resource list resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", ) assert resp.status_code == 200 content = resp.json() assert content["resources"][0]["usermetadata"]["classifications"][0] == expected_label # Check cancelled labels come in search results - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query=summary") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?query=summary") assert resp.status_code == 200 content = resp.json() assert content["resources"][rid]["usermetadata"]["classifications"][0] == expected_label +@pytest.mark.deploy_modes("standalone") async def test_classification_labels_are_shown_in_resource_basic( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc, - knowledgebox, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox, ): - rid = await inject_resource_with_paragraph_labels(knowledgebox, nucliadb_grpc) + rid = await inject_resource_with_paragraph_labels(standalone_knowledgebox, nucliadb_ingest_grpc) classifications = [Classification(labelset="labelset1", label="label1")] @@ -258,19 +263,19 @@ async def test_classification_labels_are_shown_in_resource_basic( ) # Check resource get - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}?show=basic") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}?show=basic") assert resp.status_code == 200, f"Response {resp}: {resp.text}" resource = Resource.model_validate_json(resp.content) assert resource.computedmetadata == expected_computedmetadata # Check resources list - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resources?show=basic") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resources?show=basic") assert resp.status_code == 200 resources = ResourceList.model_validate_json(resp.content) assert resources.resources[0].computedmetadata == expected_computedmetadata # Check search results list - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?show=basic") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?show=basic") assert resp.status_code == 200 results = KnowledgeboxSearchResults.model_validate_json(resp.content) assert results.resources[rid].computedmetadata == expected_computedmetadata @@ -309,10 +314,11 @@ def test_add_field_classifications(): ) +@pytest.mark.deploy_modes("standalone") async def test_fieldmetadata_classification_labels( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): fieldmetadata = UserFieldMetadata( field=FieldID(field="text", field_type=FieldID.FieldType.TEXT), @@ -331,14 +337,14 @@ async def test_fieldmetadata_classification_labels( fieldmetadata=[fieldmetadata], ) resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", - data=payload.model_dump_json(), # type: ignore + f"/kb/{standalone_knowledgebox}/resources", + content=payload.model_dump_json(), ) assert resp.status_code == 201 rid = resp.json()["uuid"] # Check resource get - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}?show=basic") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}?show=basic") assert resp.status_code == 200 resource = Resource.model_validate_json(resp.content) assert resource.fieldmetadata[0] == fieldmetadata # type: ignore diff --git a/nucliadb/tests/nucliadb/integration/test_labelsets.py b/nucliadb/tests/nucliadb/integration/test_labelsets.py index 6f7f68ff8d..45ffd61448 100644 --- a/nucliadb/tests/nucliadb/integration/test_labelsets.py +++ b/nucliadb/tests/nucliadb/integration/test_labelsets.py @@ -18,15 +18,17 @@ # along with this program. If not, see . # +import pytest from httpx import AsyncClient +@pytest.mark.deploy_modes("standalone") async def test_selection_labelsets( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, ): - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await nucliadb_writer.post( f"/kb/{kbid}/labelset/myselections", @@ -58,12 +60,13 @@ async def test_selection_labelsets( assert body["labels"] == [] +@pytest.mark.deploy_modes("standalone") async def test_duplicated_labelsets_not_allowed( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, ): - kbid = knowledgebox + kbid = standalone_knowledgebox # Create labelset resp = await nucliadb_writer.post( @@ -105,12 +108,13 @@ async def test_duplicated_labelsets_not_allowed( assert resp.status_code == 422 +@pytest.mark.deploy_modes("standalone") async def test_duplicated_labels_not_allowed( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, ): - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await nucliadb_writer.post( f"/kb/{kbid}/labelset/myselections", diff --git a/nucliadb/tests/nucliadb/integration/test_matryoshka_embeddings.py b/nucliadb/tests/nucliadb/integration/test_matryoshka_embeddings.py index 3c7c8e8954..2b551db91b 100644 --- a/nucliadb/tests/nucliadb/integration/test_matryoshka_embeddings.py +++ b/nucliadb/tests/nucliadb/integration/test_matryoshka_embeddings.py @@ -20,6 +20,7 @@ import uuid from unittest.mock import patch +import pytest from faker import Faker from httpx import AsyncClient @@ -34,9 +35,10 @@ fake = Faker() +@pytest.mark.deploy_modes("standalone") async def test_matryoshka_embeddings( maindb_driver: Driver, - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, learning_config, @@ -56,7 +58,7 @@ async def test_matryoshka_embeddings( semantic_matryoshka_dims=matryoshka_dimensions, ) - new_kb_response = await nucliadb_grpc.NewKnowledgeBoxV2( # type: ignore + new_kb_response = await nucliadb_ingest_grpc.NewKnowledgeBoxV2( # type: ignore writer_pb2.NewKnowledgeBoxV2Request( kbid=kbid, slug=slug, @@ -117,7 +119,7 @@ async def test_matryoshka_embeddings( bmb.add_field_builder(text_field) bm = bmb.build() - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) # Search diff --git a/nucliadb/tests/nucliadb/integration/test_pinecone_kb.py b/nucliadb/tests/nucliadb/integration/test_pinecone_kb.py index 1da015de12..0d259576d3 100644 --- a/nucliadb/tests/nucliadb/integration/test_pinecone_kb.py +++ b/nucliadb/tests/nucliadb/integration/test_pinecone_kb.py @@ -94,20 +94,43 @@ def mock_pinecone_client(data_plane, control_plane): yield session_mock +@pytest.fixture(scope="function") +async def pinecone_knowledgebox(nucliadb_writer_manager: AsyncClient, mock_pinecone_client): + resp = await nucliadb_writer_manager.post( + "/kbs", + json={ + "slug": "pinecone_knowledgebox", + "external_index_provider": { + "type": "pinecone", + "api_key": "my-pinecone-api-key", + "serverless_cloud": "aws_us_east_1", + }, + }, + ) + assert resp.status_code == 201 + uuid = resp.json().get("uuid") + + yield uuid + + resp = await nucliadb_writer_manager.delete(f"/kb/{uuid}") + assert resp.status_code == 200 + + @pytest.fixture(autouse=True) def hosted_nucliadb(): with unittest.mock.patch("nucliadb.ingest.service.writer.is_onprem_nucliadb", return_value=False): yield +@pytest.mark.deploy_modes("standalone") async def test_kb_creation( - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, nucliadb_reader: AsyncClient, control_plane, ): """ This tests the new method for creating kbs on a hosted nucliadb that - uses the nucliadb_grpc.NewKnowledgeBoxV2 method. + uses the nucliadb_ingest_grpc.NewKnowledgeBoxV2 method. """ expected_index_names = ["nuclia-someuuid1", "nuclia-someuuid2"] with mock.patch( @@ -146,7 +169,7 @@ async def test_kb_creation( ) # Creating a knowledge with 2 vectorsets box should create two Pinecone indexes - response: NewKnowledgeBoxV2Response = await nucliadb_grpc.NewKnowledgeBoxV2( + response: NewKnowledgeBoxV2Response = await nucliadb_ingest_grpc.NewKnowledgeBoxV2( request, timeout=None ) # type: ignore assert response.status == KnowledgeBoxResponseStatus.OK @@ -192,7 +215,7 @@ async def test_kb_creation( assert pinecone_config.indexes[english].vector_dimension == 3 # Deleting a knowledge box should delete the Pinecone index - response = await nucliadb_grpc.DeleteKnowledgeBox( + response = await nucliadb_ingest_grpc.DeleteKnowledgeBox( KnowledgeBoxID(slug=slug, uuid=kbid), timeout=None ) # type: ignore assert response.status == KnowledgeBoxResponseStatus.OK @@ -201,6 +224,7 @@ async def test_kb_creation( assert control_plane.delete_index.call_count == 2 +@pytest.mark.deploy_modes("standalone") async def test_get_kb( nucliadb_reader: AsyncClient, pinecone_knowledgebox: str, @@ -216,6 +240,7 @@ async def test_get_kb( assert config["configured_external_index_provider"]["type"] == "pinecone" +@pytest.mark.deploy_modes("standalone") async def test_kb_counters( nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, @@ -247,10 +272,11 @@ async def test_kb_counters( } +@pytest.mark.deploy_modes("standalone") async def test_find_on_pinecone_kb( nucliadb_reader: AsyncClient, pinecone_knowledgebox: str, - pinecone_data_plane, + data_plane, ): kbid = pinecone_knowledgebox @@ -261,7 +287,8 @@ async def test_find_on_pinecone_kb( assert resp.status_code == 200, resp.text -async def _inject_broker_message(nucliadb_grpc: WriterStub, kbid: str, rid: str, slug: str): +@pytest.mark.deploy_modes("standalone") +async def _inject_broker_message(nucliadb_ingest_grpc: WriterStub, kbid: str, rid: str, slug: str): bm = BrokerMessage(kbid=kbid, uuid=rid, slug=slug, type=BrokerMessage.AUTOCOMMIT) bm.basic.icon = "text/plain" bm.basic.title = "Title Resource" @@ -358,11 +385,12 @@ async def _inject_broker_message(nucliadb_grpc: WriterStub, kbid: str, rid: str, bm.field_vectors.append(ev) bm.source = BrokerMessage.MessageSource.PROCESSOR - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) +@pytest.mark.deploy_modes("standalone") async def test_ingestion_on_pinecone_kb( - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, pinecone_knowledgebox: str, @@ -383,7 +411,7 @@ async def test_ingestion_on_pinecone_kb( assert resp.status_code == 201 rid = resp.json()["uuid"] - await _inject_broker_message(nucliadb_grpc, kbid, rid, slug) + await _inject_broker_message(nucliadb_ingest_grpc, kbid, rid, slug) assert data_plane.delete_by_id_prefix.await_count == 1 assert data_plane.upsert_in_batches.await_count == 1 @@ -401,9 +429,10 @@ async def app_context(natsd, storage, nucliadb): await ctx.finalize() +@pytest.mark.deploy_modes("standalone") async def test_pinecone_kb_rollover_index( app_context, - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, nucliadb_writer: AsyncClient, pinecone_knowledgebox: str, data_plane, @@ -425,7 +454,7 @@ async def test_pinecone_kb_rollover_index( rid = resp.json()["uuid"] # Inject a broker message as if it was the result of a Nuclia processing request - await _inject_broker_message(nucliadb_grpc, kbid, rid, slug) + await _inject_broker_message(nucliadb_ingest_grpc, kbid, rid, slug) # Check that vectors were upserted to pinecone assert data_plane.upsert_in_batches.await_count == 1 diff --git a/nucliadb/tests/nucliadb/integration/test_predict_proxy.py b/nucliadb/tests/nucliadb/integration/test_predict_proxy.py index 43f0d519c5..f6555f244b 100644 --- a/nucliadb/tests/nucliadb/integration/test_predict_proxy.py +++ b/nucliadb/tests/nucliadb/integration/test_predict_proxy.py @@ -20,6 +20,7 @@ import pytest +from httpx import AsyncClient @pytest.mark.parametrize( @@ -44,8 +45,16 @@ ), ], ) -async def test_predict_proxy(nucliadb_reader, knowledgebox, method, endpoint, params, payload): - kbid = knowledgebox +@pytest.mark.deploy_modes("standalone") +async def test_predict_proxy( + nucliadb_reader: AsyncClient, + standalone_knowledgebox: str, + method: str, + endpoint: str, + params, + payload, +): + kbid = standalone_knowledgebox http_func = getattr(nucliadb_reader, method.lower()) http_func_kwargs = {"params": params} if method == "POST": @@ -58,11 +67,12 @@ async def test_predict_proxy(nucliadb_reader, knowledgebox, method, endpoint, pa assert resp.status_code == 200, resp.text +@pytest.mark.deploy_modes("standalone") async def test_predict_proxy_not_proxied_returns_422( - nucliadb_reader, - knowledgebox, + nucliadb_reader: AsyncClient, + standalone_knowledgebox: str, ): - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await nucliadb_reader.post( f"/kb/{kbid}/predict/summarize", json={"resources": {"foo": "bar"}}, @@ -70,8 +80,9 @@ async def test_predict_proxy_not_proxied_returns_422( assert resp.status_code == 422 +@pytest.mark.deploy_modes("standalone") async def test_predict_proxy_returns_404_on_non_existing_kb( - nucliadb_reader, + nucliadb_reader: AsyncClient, ): resp = await nucliadb_reader.post( f"/kb/idonotexist-kb/predict/chat", diff --git a/nucliadb/tests/nucliadb/integration/test_processing_status.py b/nucliadb/tests/nucliadb/integration/test_processing_status.py index 5053646f28..27958cf791 100644 --- a/nucliadb/tests/nucliadb/integration/test_processing_status.py +++ b/nucliadb/tests/nucliadb/integration/test_processing_status.py @@ -38,14 +38,15 @@ ), ], ) +@pytest.mark.deploy_modes("standalone") async def test_endpoint_set_resource_status_to_pending( endpoint, expected_status, payload, nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox: str, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox: str, ): """ - Create a resource with a status PROCESSED @@ -53,11 +54,11 @@ async def test_endpoint_set_resource_status_to_pending( - Check that the status is set to PENDING """ # Create a resource, processing - br = broker_resource(knowledgebox) + br = broker_resource(standalone_knowledgebox) br.texts["text"].CopyFrom( rpb.FieldText(body="This is my text field", format=rpb.FieldText.Format.PLAIN) ) - await inject_message(nucliadb_grpc, br) + await inject_message(nucliadb_ingest_grpc, br) # Receive message from processor br.source = BrokerMessage.MessageSource.PROCESSOR @@ -66,37 +67,40 @@ async def test_endpoint_set_resource_status_to_pending( etw.field.field = "text" etw.field.field_type = rpb.FieldType.TEXT br.extracted_text.append(etw) - await inject_message(nucliadb_grpc, br) + await inject_message(nucliadb_ingest_grpc, br) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{br.uuid}") assert resp.status_code == 200 resp_json = resp.json() assert resp_json["metadata"]["status"] == "PROCESSED" kwargs = payload or {} resp = await nucliadb_writer.post( - endpoint.format(kbid=knowledgebox, rid=br.uuid), + endpoint.format(kbid=standalone_knowledgebox, rid=br.uuid), **kwargs, ) assert resp.status_code == expected_status - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{br.uuid}") assert resp.status_code == 200 resp_json = resp.json() assert resp_json["metadata"]["status"] == "PENDING" +@pytest.mark.deploy_modes("standalone") async def test_field_status_errors_processor( nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox: str, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox: str, ): # Create a resource, processing - br = broker_resource(knowledgebox) - await inject_message(nucliadb_grpc, br) + br = broker_resource(standalone_knowledgebox) + await inject_message(nucliadb_ingest_grpc, br) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}?show=basic&show=errors") + resp = await nucliadb_reader.get( + f"/kb/{standalone_knowledgebox}/resource/{br.uuid}?show=basic&show=errors" + ) assert resp.status_code == 200 resp_json = resp.json() assert resp_json["metadata"]["status"] == "PENDING" @@ -116,9 +120,11 @@ async def test_field_status_errors_processor( code=Error.ErrorCode.EXTRACT, ) ) - await inject_message(nucliadb_grpc, br) + await inject_message(nucliadb_ingest_grpc, br) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}?show=basic&show=errors") + resp = await nucliadb_reader.get( + f"/kb/{standalone_knowledgebox}/resource/{br.uuid}?show=basic&show=errors" + ) assert resp.status_code == 200 resp_json = resp.json() assert resp_json["metadata"]["status"] == "ERROR" @@ -128,9 +134,11 @@ async def test_field_status_errors_processor( # Receive message from processor without errors, previous errors are cleared br.errors.pop() - await inject_message(nucliadb_grpc, br) + await inject_message(nucliadb_ingest_grpc, br) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}?show=basic&show=errors") + resp = await nucliadb_reader.get( + f"/kb/{standalone_knowledgebox}/resource/{br.uuid}?show=basic&show=errors" + ) assert resp.status_code == 200 resp_json = resp.json() assert resp_json["metadata"]["status"] == "PROCESSED" @@ -139,20 +147,23 @@ async def test_field_status_errors_processor( assert "errors" not in resp_json["data"]["generics"]["summary"] +@pytest.mark.deploy_modes("standalone") async def test_field_status_errors_data_augmentation( nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox: str, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox: str, ): # Create a resource, processing - br = broker_resource(knowledgebox) + br = broker_resource(standalone_knowledgebox) br.texts["text"].CopyFrom( rpb.FieldText(body="This is my text field", format=rpb.FieldText.Format.PLAIN) ) - await inject_message(nucliadb_grpc, br) + await inject_message(nucliadb_ingest_grpc, br) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}?show=basic&show=errors") + resp = await nucliadb_reader.get( + f"/kb/{standalone_knowledgebox}/resource/{br.uuid}?show=basic&show=errors" + ) assert resp.status_code == 200 resp_json = resp.json() assert resp_json["metadata"]["status"] == "PENDING" @@ -169,9 +180,11 @@ async def test_field_status_errors_data_augmentation( etw.field.field = "text" etw.field.field_type = rpb.FieldType.TEXT br.extracted_text.append(etw) - await inject_message(nucliadb_grpc, br) + await inject_message(nucliadb_ingest_grpc, br) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}?show=basic&show=errors") + resp = await nucliadb_reader.get( + f"/kb/{standalone_knowledgebox}/resource/{br.uuid}?show=basic&show=errors" + ) assert resp.status_code == 200 resp_json = resp.json() assert resp_json["metadata"]["status"] == "PROCESSED" @@ -191,9 +204,11 @@ async def test_field_status_errors_data_augmentation( g.data_augmentation.SetInParent() br.generated_by.pop() br.generated_by.append(g) - await inject_message(nucliadb_grpc, br) + await inject_message(nucliadb_ingest_grpc, br) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}?show=basic&show=errors") + resp = await nucliadb_reader.get( + f"/kb/{standalone_knowledgebox}/resource/{br.uuid}?show=basic&show=errors" + ) assert resp.status_code == 200 resp_json = resp.json() assert resp_json["metadata"]["status"] == "PROCESSED" @@ -203,9 +218,11 @@ async def test_field_status_errors_data_augmentation( # Receive message from data augmentation without errors br.errors.pop() - await inject_message(nucliadb_grpc, br) + await inject_message(nucliadb_ingest_grpc, br) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}?show=basic&show=errors") + resp = await nucliadb_reader.get( + f"/kb/{standalone_knowledgebox}/resource/{br.uuid}?show=basic&show=errors" + ) assert resp.status_code == 200 resp_json = resp.json() assert resp_json["metadata"]["status"] == "PROCESSED" diff --git a/nucliadb/tests/nucliadb/integration/test_purge.py b/nucliadb/tests/nucliadb/integration/test_purge.py index a30b25ebc4..a67e25c420 100644 --- a/nucliadb/tests/nucliadb/integration/test_purge.py +++ b/nucliadb/tests/nucliadb/integration/test_purge.py @@ -24,6 +24,7 @@ from typing import cast from unittest.mock import AsyncMock +import pytest from httpx import AsyncClient import nucliadb.common.nidx @@ -48,10 +49,12 @@ from tests.utils.dirty_index import wait_for_sync +@pytest.mark.deploy_modes("standalone") async def test_purge_deletes_everything_from_maindb( maindb_driver: Driver, storage: Storage, - nucliadb_manager: AsyncClient, + nucliadb_writer_manager: AsyncClient, + nucliadb_reader_manager: AsyncClient, nucliadb_writer: AsyncClient, ): """Create a KB and some resource and then purge it. Validate that purge @@ -59,11 +62,11 @@ async def test_purge_deletes_everything_from_maindb( """ kb_slug = str(uuid.uuid4()) - resp = await nucliadb_manager.post("/kbs", json={"slug": kb_slug}) + resp = await nucliadb_writer_manager.post("/kbs", json={"slug": kb_slug}) assert resp.status_code == 201 kbid = resp.json().get("uuid") - resp = await nucliadb_manager.get("/kbs") + resp = await nucliadb_reader_manager.get("/kbs") body = resp.json() assert len(body["kbs"]) == 1 assert body["kbs"][0]["uuid"] == kbid @@ -84,10 +87,10 @@ async def test_purge_deletes_everything_from_maindb( assert await kb_catalog_entries_count(maindb_driver, kbid) > 0 - resp = await nucliadb_manager.delete(f"/kb/{kbid}") + resp = await nucliadb_writer_manager.delete(f"/kb/{kbid}") assert resp.status_code == 200 - resp = await nucliadb_manager.get("/kbs") + resp = await nucliadb_reader_manager.get("/kbs") body = resp.json() assert len(body["kbs"]) == 0 @@ -111,10 +114,11 @@ async def test_purge_deletes_everything_from_maindb( assert len(keys_after_purge_storage) == 0 +@pytest.mark.deploy_modes("standalone") async def test_purge_orphan_shards( maindb_driver: Driver, storage: Storage, - nucliadb_manager: AsyncClient, + nucliadb_writer_manager: AsyncClient, nucliadb_writer: AsyncClient, ): """Create a KB with some resource (hence a shard) and delete it. Simulate an @@ -122,7 +126,7 @@ async def test_purge_orphan_shards( """ kb_slug = str(uuid.uuid4()) - resp = await nucliadb_manager.post("/kbs", json={"slug": kb_slug}) + resp = await nucliadb_writer_manager.post("/kbs", json={"slug": kb_slug}) assert resp.status_code == 201 kbid = resp.json().get("uuid") @@ -143,7 +147,7 @@ async def test_purge_orphan_shards( with unittest.mock.patch.object(nucliadb.common.nidx.get_nidx(), "api_client"): nucliadb.common.nidx.get_nidx().api_client.DeleteShard = AsyncMock() - resp = await nucliadb_manager.delete(f"/kb/{kbid}") + resp = await nucliadb_writer_manager.delete(f"/kb/{kbid}") assert resp.status_code == 200, resp.text await purge_kb(maindb_driver) @@ -170,10 +174,11 @@ async def test_purge_orphan_shards( assert len(shards) == 0 +@pytest.mark.deploy_modes("standalone") async def test_purge_orphan_shard_detection( maindb_driver: Driver, storage: Storage, - nucliadb_manager: AsyncClient, + nucliadb_writer_manager: AsyncClient, nucliadb_writer: AsyncClient, ): """Prepare a situation where there are: @@ -185,7 +190,7 @@ async def test_purge_orphan_shard_detection( """ # Regular KB kb_slug = str(uuid.uuid4()) - resp = await nucliadb_manager.post("/kbs", json={"slug": kb_slug}) + resp = await nucliadb_writer_manager.post("/kbs", json={"slug": kb_slug}) assert resp.status_code == 201 kbid = resp.json().get("uuid") @@ -238,15 +243,16 @@ async def kb_catalog_entries_count(driver: Driver, kbid: str) -> int: return count[0] +@pytest.mark.deploy_modes("standalone") async def test_purge_resources_deleted_storage( maindb_driver: Driver, storage: Storage, - nucliadb_manager: AsyncClient, + nucliadb_writer_manager: AsyncClient, nucliadb_writer: AsyncClient, ): # Create a KB kb_slug = str(uuid.uuid4()) - resp = await nucliadb_manager.post("/kbs", json={"slug": kb_slug}) + resp = await nucliadb_writer_manager.post("/kbs", json={"slug": kb_slug}) assert resp.status_code == 201 kbid = resp.json().get("uuid") diff --git a/nucliadb/tests/nucliadb/integration/test_purge_vectorsets.py b/nucliadb/tests/nucliadb/integration/test_purge_vectorsets.py index 7ff4ac427a..25c6770fca 100644 --- a/nucliadb/tests/nucliadb/integration/test_purge_vectorsets.py +++ b/nucliadb/tests/nucliadb/integration/test_purge_vectorsets.py @@ -22,6 +22,8 @@ import uuid from unittest.mock import AsyncMock, patch +import pytest + from nucliadb.common import datamanagers from nucliadb.common.maindb.driver import Driver from nucliadb.ingest.orm.knowledgebox import ( @@ -37,8 +39,12 @@ from tests.utils import inject_message +@pytest.mark.deploy_modes("standalone") async def test_purge_vectorsets__kb_with_vectorsets( - maindb_driver: Driver, storage: Storage, nucliadb_grpc: WriterStub, knowledgebox_with_vectorsets: str + maindb_driver: Driver, + storage: Storage, + nucliadb_ingest_grpc: WriterStub, + knowledgebox_with_vectorsets: str, ): kbid = knowledgebox_with_vectorsets vectorset_id = "my-semantic-model-A" @@ -46,7 +52,7 @@ async def test_purge_vectorsets__kb_with_vectorsets( resource_count = 5 for i in range(resource_count): bm = await create_broker_message_with_vectorset(kbid, maindb_driver) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) with patch.object( storage, "delete_upload", new=AsyncMock(side_effect=storage.delete_upload) diff --git a/nucliadb/tests/nucliadb/integration/test_reindex.py b/nucliadb/tests/nucliadb/integration/test_reindex.py index ae910b10b3..4a2ce3c9d5 100644 --- a/nucliadb/tests/nucliadb/integration/test_reindex.py +++ b/nucliadb/tests/nucliadb/integration/test_reindex.py @@ -21,6 +21,7 @@ import base64 import hashlib +import pytest from httpx import AsyncClient from nucliadb.common import datamanagers @@ -32,31 +33,35 @@ from tests.utils import dirty_index, inject_message +@pytest.mark.deploy_modes("standalone") async def test_reindex( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox: str, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox: str, ): - await _test_reindex(nucliadb_reader, nucliadb_writer, nucliadb_grpc, knowledgebox) + await _test_reindex(nucliadb_reader, nucliadb_writer, nucliadb_ingest_grpc, standalone_knowledgebox) +@pytest.mark.deploy_modes("standalone") async def test_reindex_kb_with_vectorsets( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, knowledgebox_with_vectorsets: str, ): - await _test_reindex(nucliadb_reader, nucliadb_writer, nucliadb_grpc, knowledgebox_with_vectorsets) + await _test_reindex( + nucliadb_reader, nucliadb_writer, nucliadb_ingest_grpc, knowledgebox_with_vectorsets + ) async def _test_reindex( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, kbid, ): - rid = await create_resource(kbid, nucliadb_writer, nucliadb_grpc) + rid = await create_resource(kbid, nucliadb_writer, nucliadb_ingest_grpc) # Doing a search should return results resp = await nucliadb_reader.get(f"/kb/{kbid}/search?query=text") @@ -102,7 +107,7 @@ async def _test_reindex( assert len(content["paragraphs"]["results"]) > 0 -async def create_resource(kbid: str, nucliadb_writer: AsyncClient, nucliadb_grpc: WriterStub): +async def create_resource(kbid: str, nucliadb_writer: AsyncClient, nucliadb_ingest_grpc: WriterStub): # create resource file_content = b"This is a file" field_id = "myfile" @@ -130,7 +135,7 @@ async def create_resource(kbid: str, nucliadb_writer: AsyncClient, nucliadb_grpc # update it with extracted data bm = await broker_resource(kbid, rid) bm.source = BrokerMessage.MessageSource.PROCESSOR - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) return bm.uuid diff --git a/nucliadb/tests/nucliadb/integration/test_relations.py b/nucliadb/tests/nucliadb/integration/test_relations.py index 9a6097ec63..c3d7faf4e6 100644 --- a/nucliadb/tests/nucliadb/integration/test_relations.py +++ b/nucliadb/tests/nucliadb/integration/test_relations.py @@ -33,12 +33,12 @@ @pytest.fixture async def resource_with_bm_relations( - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "slug": "myresource", "texts": {"text1": {"body": "Mickey loves Minnie"}}, @@ -48,23 +48,24 @@ async def resource_with_bm_relations( rid = resp.json()["uuid"] bm = await create_broker_message_with_relations() - bm.kbid = knowledgebox + bm.kbid = standalone_knowledgebox bm.uuid = rid - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) yield rid, "text1" +@pytest.mark.deploy_modes("standalone") async def test_api_aliases( nucliadb_reader: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, resource_with_bm_relations: tuple[str, str], ): rid, field_id = resource_with_bm_relations resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}", + f"/kb/{standalone_knowledgebox}/resource/{rid}", params=dict( show=["relations", "extracted"], extracted=["metadata"], @@ -78,7 +79,7 @@ async def test_api_aliases( assert "from_" not in extracted_metadata["metadata"]["relations"][0] resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}/text/{field_id}", + f"/kb/{standalone_knowledgebox}/resource/{rid}/text/{field_id}", params=dict( show=["extracted"], extracted=["metadata"], @@ -91,9 +92,10 @@ async def test_api_aliases( assert "from_" not in body["extracted"]["metadata"]["metadata"]["relations"][0] +@pytest.mark.deploy_modes("standalone") async def test_broker_message_relations( nucliadb_reader: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, resource_with_bm_relations: tuple[str, str], ): """ @@ -106,7 +108,7 @@ async def test_broker_message_relations( rid, field_id = resource_with_bm_relations resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}", + f"/kb/{standalone_knowledgebox}/resource/{rid}", params=dict( show=["relations", "extracted"], extracted=["metadata"], @@ -127,7 +129,7 @@ async def test_broker_message_relations( ) resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}/text/{field_id}", + f"/kb/{standalone_knowledgebox}/resource/{rid}/text/{field_id}", params=dict( show=["extracted"], extracted=["metadata"], @@ -138,11 +140,12 @@ async def test_broker_message_relations( assert len(body["extracted"]["metadata"]["metadata"]["relations"]) == 1 +@pytest.mark.deploy_modes("standalone") async def test_extracted_relations( - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): """ Test description: @@ -151,7 +154,7 @@ async def test_extracted_relations( extracted and test it. """ resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My resource", "slug": "myresource", @@ -199,7 +202,7 @@ async def test_extracted_relations( assert resp.status_code == 201 rid = resp.json()["uuid"] - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}?show=basic") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}?show=basic") assert resp.status_code == 200 assert len(resp.json()["usermetadata"]["relations"]) == 5 diff --git a/nucliadb/tests/nucliadb/integration/test_resources.py b/nucliadb/tests/nucliadb/integration/test_resources.py index 325dc179d0..9bb972a3a8 100644 --- a/nucliadb/tests/nucliadb/integration/test_resources.py +++ b/nucliadb/tests/nucliadb/integration/test_resources.py @@ -29,13 +29,14 @@ from nucliadb.writer.api.v1.router import KB_PREFIX, RESOURCES_PREFIX +@pytest.mark.deploy_modes("standalone") async def test_resource_crud( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): resp = await nucliadb_writer.post( - f"/{KB_PREFIX}/{knowledgebox}/{RESOURCES_PREFIX}", + f"/{KB_PREFIX}/{standalone_knowledgebox}/{RESOURCES_PREFIX}", json={ "slug": "mykb", "title": "My KB", @@ -44,35 +45,36 @@ async def test_resource_crud( assert resp.status_code == 201 rid = resp.json()["uuid"] - resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{knowledgebox}/resource/{rid}") + resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rid}") assert resp.status_code == 200 assert resp.json()["title"] == "My KB" resp = await nucliadb_writer.patch( - f"/{KB_PREFIX}/{knowledgebox}/resource/{rid}", + f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rid}", json={ "title": "My updated KB", }, ) assert resp.status_code == 200 - resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{knowledgebox}/resource/{rid}") + resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rid}") assert resp.status_code == 200 assert resp.json()["title"] == "My updated KB" resp = await nucliadb_writer.delete( - f"/{KB_PREFIX}/{knowledgebox}/resource/{rid}", + f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rid}", ) assert resp.status_code == 204 - resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{knowledgebox}/resource/{rid}") + resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rid}") assert resp.status_code == 404 +@pytest.mark.deploy_modes("standalone") async def test_list_resources( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): """ - Create 20 resources @@ -82,7 +84,7 @@ async def test_list_resources( rids = set() for _ in range(20): resp = await nucliadb_writer.post( - f"/{KB_PREFIX}/{knowledgebox}/{RESOURCES_PREFIX}", + f"/{KB_PREFIX}/{standalone_knowledgebox}/{RESOURCES_PREFIX}", json={ "title": "My resource", }, @@ -91,12 +93,12 @@ async def test_list_resources( rids.add(resp.json()["uuid"]) got_rids = set() - resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{knowledgebox}/resources?size=10&page=0") + resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{standalone_knowledgebox}/resources?size=10&page=0") assert resp.status_code == 200 for r in resp.json()["resources"]: got_rids.add(r["id"]) - resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{knowledgebox}/resources?size=10&page=1") + resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{standalone_knowledgebox}/resources?size=10&page=1") assert resp.status_code == 200 for r in resp.json()["resources"]: got_rids.add(r["id"]) @@ -104,16 +106,17 @@ async def test_list_resources( assert got_rids == rids +@pytest.mark.deploy_modes("standalone") async def test_get_resource_field( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): slug = "my-resource" field = "text-field" resp = await nucliadb_writer.post( - f"/{KB_PREFIX}/{knowledgebox}/{RESOURCES_PREFIX}", + f"/{KB_PREFIX}/{standalone_knowledgebox}/{RESOURCES_PREFIX}", json={ "slug": slug, "title": "My Resource", @@ -123,29 +126,30 @@ async def test_get_resource_field( assert resp.status_code == 201 rid = resp.json()["uuid"] - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}/text/{field}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}/text/{field}") assert resp.status_code == 200 body_by_slug = resp.json() - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/slug/{slug}/text/{field}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/slug/{slug}/text/{field}") assert resp.status_code == 200 body_by_rid = resp.json() assert body_by_slug == body_by_rid +@pytest.mark.deploy_modes("standalone") async def test_resource_creation_slug_conflicts( nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, philosophy_books_kb, ): """ Test that creating two resources with the same slug raises a conflict error """ slug = "myresource" - resources_path = f"/{KB_PREFIX}/{{knowledgebox}}/{RESOURCES_PREFIX}" + resources_path = f"/{KB_PREFIX}/{{standalone_knowledgebox}}/{RESOURCES_PREFIX}" resp = await nucliadb_writer.post( - resources_path.format(knowledgebox=knowledgebox), + resources_path.format(standalone_knowledgebox=standalone_knowledgebox), json={ "slug": slug, }, @@ -153,7 +157,7 @@ async def test_resource_creation_slug_conflicts( assert resp.status_code == 201 resp = await nucliadb_writer.post( - resources_path.format(knowledgebox=knowledgebox), + resources_path.format(standalone_knowledgebox=standalone_knowledgebox), json={ "slug": slug, }, @@ -162,7 +166,7 @@ async def test_resource_creation_slug_conflicts( # Creating it in another KB should not raise conflict error resp = await nucliadb_writer.post( - resources_path.format(knowledgebox=philosophy_books_kb), + resources_path.format(standalone_knowledgebox=philosophy_books_kb), json={ "slug": slug, }, @@ -170,13 +174,14 @@ async def test_resource_creation_slug_conflicts( assert resp.status_code == 201 +@pytest.mark.deploy_modes("standalone") async def test_title_is_set_automatically_if_not_provided( - nucliadb_reader, - nucliadb_writer, - knowledgebox, + nucliadb_reader: AsyncClient, + nucliadb_writer: AsyncClient, + standalone_knowledgebox, ): resp = await nucliadb_writer.post( - f"/{KB_PREFIX}/{knowledgebox}/{RESOURCES_PREFIX}", + f"/{KB_PREFIX}/{standalone_knowledgebox}/{RESOURCES_PREFIX}", json={ "texts": {"text-field": {"body": "test1", "format": "PLAIN"}}, }, @@ -184,22 +189,23 @@ async def test_title_is_set_automatically_if_not_provided( assert resp.status_code == 201 rid = resp.json()["uuid"] - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}") assert resp.status_code == 200 body = resp.json() assert body["title"] == rid @pytest.mark.parametrize("update_by", ["slug", "uuid"]) +@pytest.mark.deploy_modes("standalone") async def test_resource_slug_modification( - nucliadb_reader, - nucliadb_writer, - knowledgebox, + nucliadb_reader: AsyncClient, + nucliadb_writer: AsyncClient, + standalone_knowledgebox, update_by, ): old_slug = "my-resource" resp = await nucliadb_writer.post( - f"/{KB_PREFIX}/{knowledgebox}/{RESOURCES_PREFIX}", + f"/{KB_PREFIX}/{standalone_knowledgebox}/{RESOURCES_PREFIX}", json={ "title": "My Resource", "slug": old_slug, @@ -208,14 +214,14 @@ async def test_resource_slug_modification( assert resp.status_code == 201 rid = resp.json()["uuid"] - await check_resource(nucliadb_reader, knowledgebox, rid, old_slug) + await check_resource(nucliadb_reader, standalone_knowledgebox, rid, old_slug) # Update the slug new_slug = "my-resource-2" if update_by == "slug": - path = f"/{KB_PREFIX}/{knowledgebox}/slug/{old_slug}" + path = f"/{KB_PREFIX}/{standalone_knowledgebox}/slug/{old_slug}" else: - path = f"/{KB_PREFIX}/{knowledgebox}/resource/{rid}" + path = f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rid}" resp = await nucliadb_writer.patch( path, json={ @@ -225,10 +231,10 @@ async def test_resource_slug_modification( ) assert resp.status_code == 200 - await check_resource(nucliadb_reader, knowledgebox, rid, new_slug, title="New title") + await check_resource(nucliadb_reader, standalone_knowledgebox, rid, new_slug, title="New title") -async def check_resource(nucliadb_reader, kbid, rid, slug, **body_checks): +async def check_resource(nucliadb_reader: AsyncClient, kbid, rid, slug, **body_checks): resp = await nucliadb_reader.get(f"/kb/{kbid}/resource/{rid}") assert resp.status_code == 200 assert resp.json()["slug"] == slug @@ -241,14 +247,15 @@ async def check_resource(nucliadb_reader, kbid, rid, slug, **body_checks): assert body[key] == value +@pytest.mark.deploy_modes("standalone") async def test_resource_slug_modification_rollbacks( - nucliadb_reader, - nucliadb_writer, - knowledgebox, + nucliadb_reader: AsyncClient, + nucliadb_writer: AsyncClient, + standalone_knowledgebox, ): old_slug = "my-resource" resp = await nucliadb_writer.post( - f"/{KB_PREFIX}/{knowledgebox}/{RESOURCES_PREFIX}", + f"/{KB_PREFIX}/{standalone_knowledgebox}/{RESOURCES_PREFIX}", json={ "title": "Old title", "slug": old_slug, @@ -257,7 +264,7 @@ async def test_resource_slug_modification_rollbacks( assert resp.status_code == 201 rid = resp.json()["uuid"] - await check_resource(nucliadb_reader, knowledgebox, rid, old_slug) + await check_resource(nucliadb_reader, standalone_knowledgebox, rid, old_slug) # Mock an error in the sending to process with mock.patch( @@ -265,7 +272,7 @@ async def test_resource_slug_modification_rollbacks( side_effect=HTTPException(status_code=506), ): resp = await nucliadb_writer.patch( - f"/{KB_PREFIX}/{knowledgebox}/resource/{rid}", + f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rid}", json={ "slug": "my-resource-2", "title": "New title", @@ -274,12 +281,13 @@ async def test_resource_slug_modification_rollbacks( assert resp.status_code == 506 # Check that slug and title were not updated - await check_resource(nucliadb_reader, knowledgebox, rid, old_slug, title="New title") + await check_resource(nucliadb_reader, standalone_knowledgebox, rid, old_slug, title="New title") +@pytest.mark.deploy_modes("standalone") async def test_resource_slug_modification_handles_conflicts( - nucliadb_writer, - knowledgebox, + nucliadb_writer: AsyncClient, + standalone_knowledgebox, ): rids = [] slugs = [] @@ -287,7 +295,7 @@ async def test_resource_slug_modification_handles_conflicts( slug = f"my-resource-{i}" slugs.append(slug) resp = await nucliadb_writer.post( - f"/{KB_PREFIX}/{knowledgebox}/{RESOURCES_PREFIX}", + f"/{KB_PREFIX}/{standalone_knowledgebox}/{RESOURCES_PREFIX}", json={ "title": "My Resource", "slug": slug, @@ -298,7 +306,7 @@ async def test_resource_slug_modification_handles_conflicts( rids.append(rid) # Check that conflicts on slug are detected - path = f"/{KB_PREFIX}/{knowledgebox}/resource/{rids[0]}" + path = f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rids[0]}" resp = await nucliadb_writer.patch( path, json={ @@ -308,12 +316,13 @@ async def test_resource_slug_modification_handles_conflicts( assert resp.status_code == 409 +@pytest.mark.deploy_modes("standalone") async def test_resource_slug_modification_handles_unknown_resources( - nucliadb_writer, - knowledgebox, + nucliadb_writer: AsyncClient, + standalone_knowledgebox, ): resp = await nucliadb_writer.patch( - f"/{KB_PREFIX}/{knowledgebox}/resource/foobar", + f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/foobar", json={ "slug": "foo", }, @@ -321,9 +330,10 @@ async def test_resource_slug_modification_handles_unknown_resources( assert resp.status_code == 404 +@pytest.mark.deploy_modes("standalone") async def test_parallel_dup_resource_creation_raises_conflicts( - nucliadb_writer, - knowledgebox, + nucliadb_writer: AsyncClient, + standalone_knowledgebox, ): driver = get_driver() if not isinstance(driver, PGDriver): @@ -344,7 +354,7 @@ async def create_resource(kbid: str): # Create 5 requests that attempt to create the same resource with the same slug simultaneously tasks = [] for _ in range(5): - tasks.append(asyncio.create_task(create_resource(knowledgebox))) + tasks.append(asyncio.create_task(create_resource(standalone_knowledgebox))) status_codes = await asyncio.gather(*tasks) # Check that only one succeeded diff --git a/nucliadb/tests/nucliadb/integration/test_security.py b/nucliadb/tests/nucliadb/integration/test_security.py index eb20565244..d33ee4f120 100644 --- a/nucliadb/tests/nucliadb/integration/test_security.py +++ b/nucliadb/tests/nucliadb/integration/test_security.py @@ -21,14 +21,15 @@ from typing import Optional import pytest +from httpx import AsyncClient PLATFORM_GROUP = "platform" DEVELOPERS_GROUP = "developers" @pytest.fixture(scope="function") -async def resource_with_security(nucliadb_writer, knowledgebox): - kbid = knowledgebox +async def resource_with_security(nucliadb_writer: AsyncClient, standalone_knowledgebox: str): + kbid = standalone_knowledgebox resp = await nucliadb_writer.post( f"/kb/{kbid}/resources", json={ @@ -45,10 +46,11 @@ async def resource_with_security(nucliadb_writer, knowledgebox): return resp.json()["uuid"] +@pytest.mark.deploy_modes("standalone") async def test_resource_security_is_returned_serialization( - nucliadb_reader, knowledgebox, resource_with_security + nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource_with_security ): - kbid = knowledgebox + kbid = standalone_knowledgebox resource_id = resource_with_security resp = await nucliadb_reader.get(f"/kb/{kbid}/resource/{resource_id}", params={"show": ["security"]}) @@ -57,10 +59,11 @@ async def test_resource_security_is_returned_serialization( assert set(resource["security"]["access_groups"]) == set([PLATFORM_GROUP, DEVELOPERS_GROUP]) +@pytest.mark.deploy_modes("standalone") async def test_resource_security_is_updated( - nucliadb_reader, nucliadb_writer, knowledgebox, resource_with_security + nucliadb_reader: AsyncClient, nucliadb_writer, standalone_knowledgebox: str, resource_with_security ): - kbid = knowledgebox + kbid = standalone_knowledgebox resource_id = resource_with_security # Update the security of the resource: make it public for all groups @@ -85,14 +88,15 @@ async def test_resource_security_is_updated( @pytest.mark.parametrize("search_endpoint", ("find_get", "find_post", "search_get", "search_post")) +@pytest.mark.deploy_modes("standalone") async def test_resource_security_search( - nucliadb_reader, - nucliadb_writer, - knowledgebox, + nucliadb_reader: AsyncClient, + nucliadb_writer: AsyncClient, + standalone_knowledgebox: str, resource_with_security, search_endpoint, ): - kbid = knowledgebox + kbid = standalone_knowledgebox resource_id = resource_with_security support_group = "support" # Add another group to the resource @@ -173,7 +177,7 @@ async def test_resource_security_search( async def _test_search_request_with_security( search_endpoint: str, - nucliadb_reader, + nucliadb_reader: AsyncClient, kbid: str, query: str, security_groups: Optional[list[str]], diff --git a/nucliadb/tests/nucliadb/integration/test_suggest.py b/nucliadb/tests/nucliadb/integration/test_suggest.py index 8eb464a6cd..6769fb5486 100644 --- a/nucliadb/tests/nucliadb/integration/test_suggest.py +++ b/nucliadb/tests/nucliadb/integration/test_suggest.py @@ -27,20 +27,21 @@ from tests.utils import inject_message +@pytest.mark.deploy_modes("standalone") async def test_suggest_paragraphs( - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): """ Test description: - Create some resource on a knowledgebox and use the /suggest endpoint + Create some resource on a standalone_knowledgebox and use the /suggest endpoint to search them. """ resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My resource", "slug": "myresource", @@ -50,7 +51,7 @@ async def test_suggest_paragraphs( assert resp.status_code == 201 resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "The little prince", "slug": "the-little-prince", @@ -67,7 +68,7 @@ async def test_suggest_paragraphs( assert resp.status_code == 201 rid2 = resp.json()["uuid"] resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "Thus Spoke Zarathustra", "slug": "thus-spoke-zarathustra", @@ -81,14 +82,14 @@ async def test_suggest_paragraphs( rid3 = resp.json()["uuid"] # exact match - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=Nietzche") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=Nietzche") assert resp.status_code == 200 body = resp.json() assert len(body["paragraphs"]["results"]) == 1 assert body["paragraphs"]["results"][0]["rid"] == rid3 # typo tolerant search - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=princes") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=princes") assert resp.status_code == 200 body = resp.json() assert len(body["paragraphs"]["results"]) == 2 @@ -97,7 +98,7 @@ async def test_suggest_paragraphs( assert {"summary", "title"} == {result["field"] for result in body["paragraphs"]["results"]} # fuzzy search with distance 1 will only match 'a' from resource 2 - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=z") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=z") assert resp.status_code == 200 body = resp.json() assert len(body["paragraphs"]["results"]) == 1 @@ -105,14 +106,14 @@ async def test_suggest_paragraphs( assert body["paragraphs"]["results"][0]["field"] == "summary" # nonexistent term - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=Hanna+Adrent") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=Hanna+Adrent") assert resp.status_code == 200 body = resp.json() assert len(body["paragraphs"]["results"]) == 0 # by field resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/suggest", + f"/kb/{standalone_knowledgebox}/suggest", params={ "query": "prince", "fields": "a/title", @@ -125,7 +126,7 @@ async def test_suggest_paragraphs( # filter by language resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/suggest", + f"/kb/{standalone_knowledgebox}/suggest", params={ "query": "prince", "filters": "/metadata.language/en", @@ -138,7 +139,7 @@ async def test_suggest_paragraphs( # No "prince" appear in any german resource resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/suggest", + f"/kb/{standalone_knowledgebox}/suggest", params={ "query": "prince", "filters": "/metadata.language/de", @@ -149,8 +150,9 @@ async def test_suggest_paragraphs( assert len(body["paragraphs"]["results"]) == 0 +@pytest.mark.deploy_modes("standalone") async def test_suggest_related_entities( - nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, knowledgebox, request + nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, standalone_knowledgebox, request ): """ Test description: @@ -184,7 +186,7 @@ async def test_suggest_related_entities( for entity, type in entities ] resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "People and places", "slug": "pap", @@ -207,64 +209,65 @@ def assert_expected_entities(body, expected): assert set((e["value"] for e in body["entities"]["entities"])) == expected # Test simple suggestions - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=Ann") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=Ann") assert resp.status_code == 200 body = resp.json() assert_expected_entities(body, {"Anna", "Anthony"}) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=joh") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=joh") assert resp.status_code == 200 body = resp.json() assert_expected_entities(body, {"John"}) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=xxxxx") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=xxxxx") assert resp.status_code == 200 body = resp.json() assert not body["entities"]["entities"] # Test correct query tokenization - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=bar") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=bar") assert resp.status_code == 200 body = resp.json() assert_expected_entities(body, {"Barcelona", "Bárcenas"}) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=Bar") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=Bar") assert resp.status_code == 200 body = resp.json() assert_expected_entities(body, {"Barcelona", "Bárcenas"}) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=BAR") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=BAR") assert resp.status_code == 200 body = resp.json() assert_expected_entities(body, {"Barcelona", "Bárcenas"}) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=BÄR") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=BÄR") assert resp.status_code == 200 body = resp.json() assert_expected_entities(body, {"Barcelona", "Bárcenas"}) - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=BáR") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=BáR") assert resp.status_code == 200 body = resp.json() assert_expected_entities(body, {"Barcelona", "Bárcenas"}) # Test multiple word suggest and ordering - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=Solomon+Is") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=Solomon+Is") assert resp.status_code == 200 body = resp.json() assert_expected_entities(body, {"Solomon Islands", "Israel"}) +@pytest.mark.deploy_modes("standalone") async def test_suggestion_on_link_computed_titles_sc6088( - nucliadb_writer, - nucliadb_grpc, - nucliadb_reader, - knowledgebox, + nucliadb_writer: AsyncClient, + nucliadb_ingest_grpc: WriterStub, + nucliadb_reader: AsyncClient, + standalone_knowledgebox, ): # Create a resource with a link field link = "http://www.mylink.com" - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await nucliadb_writer.post( f"/kb/{kbid}/resources", json={ @@ -291,7 +294,7 @@ async def test_suggestion_on_link_computed_titles_sc6088( led.title = extracted_title bm.link_extracted_data.append(led) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) # Check that the resource title changed resp = await nucliadb_reader.get(f"/kb/{kbid}/resource/{rid}") @@ -311,10 +314,11 @@ async def test_suggestion_on_link_computed_titles_sc6088( assert suggested["text"] == extracted_title +@pytest.mark.deploy_modes("standalone") async def test_suggest_features( - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, nucliadb_reader: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, texts: dict[str, str], entities, ): @@ -341,7 +345,7 @@ def assert_expected_entities(response): assert set((e["value"] for e in response["entities"]["entities"])) == expected resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/suggest", + f"/kb/{standalone_knowledgebox}/suggest", params={"query": "ann", "features": ["paragraph", "entities"]}, ) assert resp.status_code == 200 @@ -350,7 +354,7 @@ def assert_expected_entities(response): assert_expected_paragraphs(body) resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/suggest", + f"/kb/{standalone_knowledgebox}/suggest", params={"query": "ann", "features": ["paragraph"]}, ) assert resp.status_code == 200 @@ -359,7 +363,7 @@ def assert_expected_entities(response): assert_expected_paragraphs(body) resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/suggest", params={"query": "ann", "features": ["entities"]} + f"/kb/{standalone_knowledgebox}/suggest", params={"query": "ann", "features": ["entities"]} ) assert resp.status_code == 200 body = resp.json() @@ -370,10 +374,10 @@ def assert_expected_entities(response): @pytest.fixture(scope="function") async def texts( nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, ) -> dict[str, str]: resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My resource", "slug": "myresource", @@ -384,7 +388,7 @@ async def texts( rid1 = resp.json()["uuid"] resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "The little prince", "slug": "the-little-prince", @@ -401,7 +405,7 @@ async def texts( assert resp.status_code == 201 rid2 = resp.json()["uuid"] resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "Thus Spoke Zarathustra", "slug": "thus-spoke-zarathustra", @@ -422,7 +426,7 @@ async def texts( @pytest.fixture(scope="function") -async def entities(nucliadb_writer: AsyncClient, knowledgebox: str): +async def entities(nucliadb_writer: AsyncClient, standalone_knowledgebox: str): collaborators = ["Irene", "Anastasia"] entities = [ ("Anna", "person"), @@ -449,7 +453,7 @@ async def entities(nucliadb_writer: AsyncClient, knowledgebox: str): for entity, type in entities ] resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "People and places", "slug": "pap", @@ -469,7 +473,8 @@ async def entities(nucliadb_writer: AsyncClient, knowledgebox: str): assert resp.status_code == 201 -async def test_search_kb_not_found(nucliadb_reader) -> None: +@pytest.mark.deploy_modes("standalone") +async def test_search_kb_not_found(nucliadb_reader: AsyncClient) -> None: resp = await nucliadb_reader.get( f"/kb/00000000000000/suggest?query=own+text", ) diff --git a/nucliadb/tests/nucliadb/integration/test_summarize.py b/nucliadb/tests/nucliadb/integration/test_summarize.py index 0594df8b86..5bb732edcc 100644 --- a/nucliadb/tests/nucliadb/integration/test_summarize.py +++ b/nucliadb/tests/nucliadb/integration/test_summarize.py @@ -17,17 +17,19 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # +import pytest from httpx import AsyncClient from nucliadb_models.search import SummarizedResponse +@pytest.mark.deploy_modes("standalone") async def test_summarize( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): - kbid = knowledgebox + kbid = standalone_knowledgebox resource_uuids = [] resource_slugs = [] @@ -61,6 +63,7 @@ async def test_summarize( assert set(response.resources.keys()) == set(resources) +@pytest.mark.deploy_modes("standalone") async def test_summarize_unexisting_kb( nucliadb_reader: AsyncClient, ): diff --git a/nucliadb/tests/nucliadb/integration/test_synonyms.py b/nucliadb/tests/nucliadb/integration/test_synonyms.py index 63dd90087d..a30c63522e 100644 --- a/nucliadb/tests/nucliadb/integration/test_synonyms.py +++ b/nucliadb/tests/nucliadb/integration/test_synonyms.py @@ -18,16 +18,18 @@ # along with this program. If not, see . # import pytest +from httpx import AsyncClient from nucliadb_models.search import SearchOptions +@pytest.mark.deploy_modes("standalone") async def test_custom_synonyms_api( - nucliadb_reader, - nucliadb_writer, - knowledgebox, + nucliadb_reader: AsyncClient, + nucliadb_writer: AsyncClient, + standalone_knowledgebox: str, ): - kbid = knowledgebox + kbid = standalone_knowledgebox synonyms_url = f"/kb/{kbid}/custom-synonyms" # Delete first @@ -74,8 +76,10 @@ async def test_custom_synonyms_api( @pytest.fixture(scope="function") -async def knowledgebox_with_synonyms(nucliadb_writer, knowledgebox): - kbid = knowledgebox +async def standalone_knowledgebox_with_synonyms( + nucliadb_writer: AsyncClient, standalone_knowledgebox: str +): + kbid = standalone_knowledgebox synonyms_url = f"/kb/{kbid}/custom-synonyms" kb_synonyms = { "synonyms": { @@ -87,12 +91,13 @@ async def knowledgebox_with_synonyms(nucliadb_writer, knowledgebox): yield kbid +@pytest.mark.deploy_modes("standalone") async def test_search_with_synonyms( - nucliadb_reader, - nucliadb_writer, - knowledgebox_with_synonyms, + nucliadb_reader: AsyncClient, + nucliadb_writer: AsyncClient, + standalone_knowledgebox_with_synonyms: str, ): - kbid = knowledgebox_with_synonyms + kbid = standalone_knowledgebox_with_synonyms # Create a resource with: # - the term on the summary @@ -188,11 +193,12 @@ def get_pararagraphs(body): return paragraphs +@pytest.mark.deploy_modes("standalone") async def test_search_errors_if_vectors_or_relations_requested( - nucliadb_reader, - knowledgebox, + nucliadb_reader: AsyncClient, + standalone_knowledgebox: str, ): - kbid = knowledgebox + kbid = standalone_knowledgebox resp = await nucliadb_reader.post( f"/kb/{kbid}/search", json=dict( diff --git a/nucliadb/tests/nucliadb/integration/test_text_field_json.py b/nucliadb/tests/nucliadb/integration/test_text_field_json.py index 1d07baa889..f74303da03 100644 --- a/nucliadb/tests/nucliadb/integration/test_text_field_json.py +++ b/nucliadb/tests/nucliadb/integration/test_text_field_json.py @@ -19,17 +19,19 @@ # import json +import pytest from httpx import AsyncClient from nucliadb_models.text import TextFormat +@pytest.mark.deploy_modes("standalone") async def test_text_field_in_json_format( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, ): - kbid = knowledgebox + kbid = standalone_knowledgebox field_id = "json-text" payload = {"hello": "world"} @@ -55,12 +57,13 @@ async def test_text_field_in_json_format( assert json.loads(body["data"]["texts"][field_id]["value"]["body"]) == payload +@pytest.mark.deploy_modes("standalone") async def test_text_field_with_invalid_json( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, ): - kbid = knowledgebox + kbid = standalone_knowledgebox field_id = "json-text" invalid_json = '{hello": "world"}' diff --git a/nucliadb/tests/nucliadb/integration/test_tokens.py b/nucliadb/tests/nucliadb/integration/test_tokens.py index eec7eaf6ad..dc3f0e190c 100644 --- a/nucliadb/tests/nucliadb/integration/test_tokens.py +++ b/nucliadb/tests/nucliadb/integration/test_tokens.py @@ -18,13 +18,15 @@ # along with this program. If not, see . # +import pytest from httpx import AsyncClient +@pytest.mark.deploy_modes("standalone") async def test_metadata_tokens_cancelled_by_the_user_sc_3775( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): token = { "token": "DRAG", @@ -34,7 +36,7 @@ async def test_metadata_tokens_cancelled_by_the_user_sc_3775( "cancelled_by_user": True, } resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "title": "My Resource", "summary": "My summary", @@ -55,7 +57,7 @@ async def test_metadata_tokens_cancelled_by_the_user_sc_3775( # Check cancelled tokens come in resource get resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resource/{rid}", + f"/kb/{standalone_knowledgebox}/resource/{rid}", ) assert resp.status_code == 200 content = resp.json() @@ -63,14 +65,14 @@ async def test_metadata_tokens_cancelled_by_the_user_sc_3775( # Check cancelled labels come in resource list resp = await nucliadb_reader.get( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", ) assert resp.status_code == 200 content = resp.json() assert content["resources"][0]["fieldmetadata"][0]["token"][0] == token # Check cancelled labels come in search results - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query=summary") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?query=summary") assert resp.status_code == 200 content = resp.json() assert content["resources"][rid]["fieldmetadata"][0]["token"][0] == token diff --git a/nucliadb/tests/nucliadb/integration/test_upload.py b/nucliadb/tests/nucliadb/integration/test_upload.py index d08bc40ad9..f100fb52a1 100644 --- a/nucliadb/tests/nucliadb/integration/test_upload.py +++ b/nucliadb/tests/nucliadb/integration/test_upload.py @@ -19,19 +19,21 @@ # import base64 +import pytest from httpx import AsyncClient from nucliadb.writer.tus import UPLOAD +@pytest.mark.deploy_modes("standalone") async def test_upload( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): content = b"Test for /upload endpoint" resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/{UPLOAD}", + f"/kb/{standalone_knowledgebox}/{UPLOAD}", headers={ "X-Filename": base64.b64encode(b"testfile").decode("utf-8"), "Content-Type": "text/plain", @@ -51,7 +53,7 @@ async def test_upload( assert rid assert field_id - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}/file/{field_id}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}/file/{field_id}") assert resp.status_code == 200 body = resp.json() assert body["value"]["file"]["filename"] == "testfile" @@ -62,17 +64,18 @@ async def test_upload( assert resp.content == content +@pytest.mark.deploy_modes("standalone") async def test_upload_guesses_content_type( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): filename = "testfile.txt" content = b"Test for /upload endpoint" content_type = "text/plain" # Upload the file without specifying the content type resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/{UPLOAD}", + f"/kb/{standalone_knowledgebox}/{UPLOAD}", headers={ "X-Filename": base64.b64encode(filename.encode()).decode("utf-8"), }, @@ -84,7 +87,7 @@ async def test_upload_guesses_content_type( field_id = body["field_id"] # Test that the content type is correctly guessed from the filename - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}/file/{field_id}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}/file/{field_id}") assert resp.status_code == 200 body = resp.json() assert body["value"]["file"]["filename"] == filename diff --git a/nucliadb/tests/nucliadb/integration/test_usermetadata.py b/nucliadb/tests/nucliadb/integration/test_usermetadata.py index ddc9bbb745..b4236efb4e 100644 --- a/nucliadb/tests/nucliadb/integration/test_usermetadata.py +++ b/nucliadb/tests/nucliadb/integration/test_usermetadata.py @@ -17,20 +17,22 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # +import pytest from httpx import AsyncClient +@pytest.mark.deploy_modes("standalone") async def test_labels_sc_2053( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): # PUBLIC API - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}") assert resp.status_code == 200 resp = await nucliadb_writer.post( - f"/kb/{knowledgebox}/resources", + f"/kb/{standalone_knowledgebox}/resources", json={ "slug": "myresource", "usermetadata": {"classifications": [{"labelset": "type", "label": "Book"}]}, @@ -39,14 +41,14 @@ async def test_labels_sc_2053( assert resp.status_code == 201 rid = resp.json()["uuid"] - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}") assert resp.status_code == 200 assert len(resp.json()["usermetadata"]["classifications"]) == 1 # ADD A LABEL resp = await nucliadb_writer.patch( - f"/kb/{knowledgebox}/resource/{rid}", + f"/kb/{standalone_knowledgebox}/resource/{rid}", json={ "usermetadata": { "classifications": [ @@ -58,16 +60,16 @@ async def test_labels_sc_2053( ) assert resp.status_code == 200 - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}") assert resp.status_code == 200 assert len(resp.json()["usermetadata"]["classifications"]) == 2 resp = await nucliadb_writer.patch( - f"/kb/{knowledgebox}/resource/{rid}", + f"/kb/{standalone_knowledgebox}/resource/{rid}", json={"usermetadata": {"classifications": []}}, ) assert resp.status_code == 200 - resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}") + resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}") assert resp.status_code == 200 assert len(resp.json()["usermetadata"]["classifications"]) == 0 diff --git a/nucliadb/tests/nucliadb/integration/test_vectorsets.py b/nucliadb/tests/nucliadb/integration/test_vectorsets.py index 12930fd130..34d5131937 100644 --- a/nucliadb/tests/nucliadb/integration/test_vectorsets.py +++ b/nucliadb/tests/nucliadb/integration/test_vectorsets.py @@ -61,10 +61,11 @@ VECTORSET_DIMENSION = 12 +@pytest.mark.deploy_modes("standalone") async def test_vectorsets_work_on_a_kb_with_a_single_vectorset( nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, kb_with_vectorset: KbSpecs, ): kbid = kb_with_vectorset.kbid @@ -113,13 +114,14 @@ async def test_vectorsets_work_on_a_kb_with_a_single_vectorset( "vectorset,expected", [(None, "multilingual"), ("", "multilingual"), ("myvectorset", "myvectorset")], ) +@pytest.mark.deploy_modes("standalone") async def test_vectorset_parameter_without_default_vectorset( nucliadb_reader: AsyncClient, - knowledgebox: str, - vectorset, - expected, + standalone_knowledgebox: str, + vectorset: Optional[str], + expected: str, ): - kbid = knowledgebox + kbid = standalone_knowledgebox calls: list[nodereader_pb2.SearchRequest] = [] @@ -172,13 +174,14 @@ def set_predict_default_vectorset(query_info: QueryInfo) -> QueryInfo: "vectorset,expected", [(None, "multilingual"), ("", "multilingual"), ("myvectorset", "myvectorset")], ) +@pytest.mark.deploy_modes("standalone") async def test_vectorset_parameter_with_default_vectorset( nucliadb_reader: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, vectorset, expected, ): - kbid = knowledgebox + kbid = standalone_knowledgebox calls: list[nodereader_pb2.SearchRequest] = [] @@ -221,6 +224,7 @@ async def mock_node_query(kbid: str, method, pb_query: nodereader_pb2.SearchRequ assert calls[-1].vectorset == expected +@pytest.mark.deploy_modes("standalone") async def test_querying_kb_with_vectorsets( mocker: MockerFixture, storage: Storage, @@ -228,7 +232,7 @@ async def test_querying_kb_with_vectorsets( shard_manager, learning_config, indexing_utility, - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, nucliadb_reader: AsyncClient, dummy_predict: DummyPredictEngine, ): @@ -288,7 +292,7 @@ async def inner(*args, **kwargs): rid = uuid.uuid4().hex field_id = "my-field" bm = create_broker_message_with_vectorsets(kbid, rid, field_id, [("model", 768)]) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) with ( patch.dict(utils.METHODS, {utils.Method.SEARCH: query_shard_wrapper}, clear=True), @@ -356,7 +360,7 @@ async def inner(*args, **kwargs): bm = create_broker_message_with_vectorsets( kbid, rid, field_id, [("model-A", 768), ("model-B", 1024)] ) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) with ( patch.dict(utils.METHODS, {utils.Method.SEARCH: query_shard_wrapper}, clear=True), diff --git a/nucliadb/tests/nucliadb/integration/test_vectorsets_api.py b/nucliadb/tests/nucliadb/integration/test_vectorsets_api.py index ab70d0a038..4dcf3ad6c6 100644 --- a/nucliadb/tests/nucliadb/integration/test_vectorsets_api.py +++ b/nucliadb/tests/nucliadb/integration/test_vectorsets_api.py @@ -48,12 +48,13 @@ MODULE = "nucliadb.writer.api.v1.vectorsets" +@pytest.mark.deploy_modes("standalone") async def test_vectorsets_crud( - nucliadb_manager: AsyncClient, + nucliadb_writer: AsyncClient, nucliadb_reader: AsyncClient, - knowledgebox, + standalone_knowledgebox, ): - kbid = knowledgebox + kbid = standalone_knowledgebox vectorset_id = "en-2024-04-24" existing_lconfig = LearningConfiguration( semantic_model="multilingual", @@ -97,7 +98,7 @@ async def test_vectorsets_crud( ], ): # Add the vectorset - resp = await nucliadb_manager.post(f"/kb/{kbid}/vectorsets/{vectorset_id}") + resp = await nucliadb_writer.post(f"/kb/{kbid}/vectorsets/{vectorset_id}") assert resp.status_code == 201, resp.text # Check that the vectorset has been created with the correct configuration @@ -124,7 +125,7 @@ async def test_vectorsets_crud( ], ): # Delete the vectorset - resp = await nucliadb_manager.delete(f"/kb/{kbid}/vectorsets/{vectorset_id}") + resp = await nucliadb_writer.delete(f"/kb/{kbid}/vectorsets/{vectorset_id}") assert resp.status_code == 204, resp.text # Check that the vectorset has been deleted @@ -145,7 +146,7 @@ async def test_vectorsets_crud( ], ): # Deleting your last vectorset is not allowed - resp = await nucliadb_manager.delete(f"/kb/{kbid}/vectorsets/multilingual") + resp = await nucliadb_writer.delete(f"/kb/{kbid}/vectorsets/multilingual") assert resp.status_code == 409, resp.text assert "Deletion of your last vectorset is not allowed" in resp.json()["detail"] @@ -156,7 +157,7 @@ async def test_vectorsets_crud( ], ): # But deleting twice is okay - resp = await nucliadb_manager.delete(f"/kb/{kbid}/vectorsets/{vectorset_id}") + resp = await nucliadb_writer.delete(f"/kb/{kbid}/vectorsets/{vectorset_id}") # XXX: however, we get the same error as before due to our lazy # check strategy. This shuold be a 200 assert resp.status_code == 409, resp.text @@ -170,37 +171,39 @@ async def test_vectorsets_crud( ], ): # Add and delete the vectorset again - resp = await nucliadb_manager.post(f"/kb/{kbid}/vectorsets/{vectorset_id}") + resp = await nucliadb_writer.post(f"/kb/{kbid}/vectorsets/{vectorset_id}") assert resp.status_code == 201, resp.text - resp = await nucliadb_manager.delete(f"/kb/{kbid}/vectorsets/{vectorset_id}") + resp = await nucliadb_writer.delete(f"/kb/{kbid}/vectorsets/{vectorset_id}") assert resp.status_code == 204, resp.text +@pytest.mark.deploy_modes("standalone") async def test_learning_config_errors_are_proxied_correctly( - nucliadb_manager: AsyncClient, - knowledgebox, + nucliadb_writer: AsyncClient, + standalone_knowledgebox, ): - kbid = knowledgebox + kbid = standalone_knowledgebox with patch( f"{MODULE}.learning_proxy.get_configuration", side_effect=ProxiedLearningConfigError( status_code=500, content="Learning Internal Server Error" ), ): - resp = await nucliadb_manager.post(f"/kb/{kbid}/vectorsets/foo") + resp = await nucliadb_writer.post(f"/kb/{kbid}/vectorsets/foo") assert resp.status_code == 500 assert resp.json() == {"detail": "Learning Internal Server Error"} - resp = await nucliadb_manager.delete(f"/kb/{kbid}/vectorsets/foo") + resp = await nucliadb_writer.delete(f"/kb/{kbid}/vectorsets/foo") assert resp.status_code == 500 assert resp.json() == {"detail": "Learning Internal Server Error"} @pytest.mark.parametrize("bwc_with_default_vectorset", [True, False]) +@pytest.mark.deploy_modes("standalone") async def test_vectorset_migration( - nucliadb_manager: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, + nucliadb_writer_manager: AsyncClient, + nucliadb_ingest_grpc: WriterStub, nucliadb_reader: AsyncClient, bwc_with_default_vectorset: bool, ): @@ -210,7 +213,7 @@ async def test_vectorset_migration( """ # Create a KB - resp = await nucliadb_manager.post( + resp = await nucliadb_writer_manager.post( "/kbs", json={ "title": "migrationexamples", @@ -275,7 +278,7 @@ async def test_vectorset_migration( bmb.add_field_builder(link_field) bm = bmb.build() - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) # Make a search and check that the document is found await _check_search(nucliadb_reader, kbid) @@ -283,7 +286,7 @@ async def test_vectorset_migration( # Now add a new vectorset vectorset_id = "en-2024-05-06" resp = await add_vectorset( - nucliadb_manager, kbid, vectorset_id, similarity=SimilarityFunction.COSINE, vector_dimension=1024 + nucliadb_writer, kbid, vectorset_id, similarity=SimilarityFunction.COSINE, vector_dimension=1024 ) assert resp.status_code == 201 @@ -309,7 +312,7 @@ async def test_vectorset_migration( ev.vectors.vectors.vectors.append(vector) bm2.field_vectors.append(ev) - await inject_message(nucliadb_grpc, bm2) + await inject_message(nucliadb_ingest_grpc, bm2) # Make a search with the new vectorset and check that the document is found await _check_search(nucliadb_reader, kbid, vectorset="en-2024-05-06") diff --git a/nucliadb/tests/nucliadb/integration/test_visual_selections.py b/nucliadb/tests/nucliadb/integration/test_visual_selections.py index 03a8a14194..aeb37813ca 100644 --- a/nucliadb/tests/nucliadb/integration/test_visual_selections.py +++ b/nucliadb/tests/nucliadb/integration/test_visual_selections.py @@ -36,9 +36,9 @@ @pytest.fixture(scope="function") async def annotated_file_field( nucliadb_writer: AsyncClient, - knowledgebox: str, + standalone_knowledgebox: str, ): - kbid = knowledgebox + kbid = standalone_knowledgebox field_id = "invoice" with open(INVOICE_FILENAME, "rb") as f: @@ -95,8 +95,11 @@ async def annotated_file_field( yield (rid, field_id) -async def test_visual_selection(nucliadb_reader: AsyncClient, knowledgebox: str, annotated_file_field): - kbid = knowledgebox +@pytest.mark.deploy_modes("standalone") +async def test_visual_selection( + nucliadb_reader: AsyncClient, standalone_knowledgebox: str, annotated_file_field +): + kbid = standalone_knowledgebox rid, field_id = annotated_file_field resp = await nucliadb_reader.get( diff --git a/nucliadb/tests/nucliadb/knowledgeboxes/philosophy_books.py b/nucliadb/tests/nucliadb/knowledgeboxes/philosophy_books.py index 0589aef878..d1f5c88fbb 100644 --- a/nucliadb/tests/nucliadb/knowledgeboxes/philosophy_books.py +++ b/nucliadb/tests/nucliadb/knowledgeboxes/philosophy_books.py @@ -24,7 +24,7 @@ @pytest.fixture(scope="function") async def philosophy_books_kb( - nucliadb_manager: AsyncClient, + nucliadb_writer_manager: AsyncClient, nucliadb_writer: AsyncClient, ): payloads = [ @@ -181,7 +181,7 @@ async def philosophy_books_kb( }, ] - resp = await nucliadb_manager.post("/kbs", json={"slug": "philosophy-books"}) + resp = await nucliadb_writer_manager.post("/kbs", json={"slug": "philosophy-books"}) assert resp.status_code == 201 kbid = resp.json().get("uuid") @@ -194,5 +194,5 @@ async def philosophy_books_kb( yield kbid - resp = await nucliadb_manager.delete(f"/kb/{kbid}") + resp = await nucliadb_writer_manager.delete(f"/kb/{kbid}") assert resp.status_code == 200 diff --git a/nucliadb/tests/nucliadb/knowledgeboxes/ten_dummy_resources.py b/nucliadb/tests/nucliadb/knowledgeboxes/ten_dummy_resources.py index 265d0e313b..e2f647e149 100644 --- a/nucliadb/tests/nucliadb/knowledgeboxes/ten_dummy_resources.py +++ b/nucliadb/tests/nucliadb/knowledgeboxes/ten_dummy_resources.py @@ -25,7 +25,7 @@ @pytest.fixture(scope="function") async def ten_dummy_resources_kb( - nucliadb_manager: AsyncClient, + nucliadb_writer_manager: AsyncClient, nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, ): @@ -40,7 +40,7 @@ async def ten_dummy_resources_kb( for i in range(N_RESOURCES) ] - resp = await nucliadb_manager.post("/kbs", json={"slug": "ten-dummy-resources"}) + resp = await nucliadb_writer_manager.post("/kbs", json={"slug": "ten-dummy-resources"}) assert resp.status_code == 201 kbid = resp.json().get("uuid") @@ -62,5 +62,5 @@ async def ten_dummy_resources_kb( yield kbid - resp = await nucliadb_manager.delete(f"/kb/{kbid}") + resp = await nucliadb_writer_manager.delete(f"/kb/{kbid}") assert resp.status_code == 200 diff --git a/nucliadb/tests/nucliadb/knowledgeboxes/vectorsets.py b/nucliadb/tests/nucliadb/knowledgeboxes/vectorsets.py index 1577878f82..5eebcf2fdf 100644 --- a/nucliadb/tests/nucliadb/knowledgeboxes/vectorsets.py +++ b/nucliadb/tests/nucliadb/knowledgeboxes/vectorsets.py @@ -39,18 +39,17 @@ class KbSpecs: @pytest.fixture(scope="function") async def kb_with_vectorset( - nucliadb_manager: AsyncClient, nucliadb_writer: AsyncClient, - nucliadb_grpc: WriterStub, - knowledgebox: str, + nucliadb_ingest_grpc: WriterStub, + standalone_knowledgebox: str, ) -> AsyncIterable[KbSpecs]: - # Now knowledgeboxes in standalone are already created with a single vectorset. + # Now standalone_knowledgeboxes in standalone are already created with a single vectorset. # By default it's the multilingual one (see mock predict implementation). - kbid = knowledgebox + kbid = standalone_knowledgebox vectorset_id = "multilingual" vectorset_dimension = 512 await inject_broker_message_with_vectorset_data( - nucliadb_grpc, + nucliadb_ingest_grpc, kbid, vectorset_id, vectorset_dimension=vectorset_dimension, @@ -64,7 +63,7 @@ async def kb_with_vectorset( async def inject_broker_message_with_vectorset_data( - nucliadb_grpc: WriterStub, + nucliadb_ingest_grpc: WriterStub, kbid: str, vectorset_id: str, *, @@ -83,4 +82,4 @@ async def inject_broker_message_with_vectorset_data( default_vectorset_dimension=default_vector_dimension, vectorset_dimension=vectorset_dimension, ) - await inject_message(nucliadb_grpc, bm) + await inject_message(nucliadb_ingest_grpc, bm) diff --git a/nucliadb/tests/utils/vectorsets.py b/nucliadb/tests/utils/vectorsets.py index eec8c492e8..693e2056dc 100644 --- a/nucliadb/tests/utils/vectorsets.py +++ b/nucliadb/tests/utils/vectorsets.py @@ -33,7 +33,7 @@ async def add_vectorset( - nucliadb_manager: AsyncClient, + nucliadb_writer: AsyncClient, kbid: str, vectorset_id: str, *, @@ -92,5 +92,5 @@ async def add_vectorset( "nucliadb.writer.api.v1.vectorsets.learning_proxy.update_configuration", ), ): - resp = await nucliadb_manager.post(f"/kb/{kbid}/vectorsets/{vectorset_id}") + resp = await nucliadb_writer.post(f"/kb/{kbid}/vectorsets/{vectorset_id}") return resp