diff --git a/nucliadb/src/nucliadb/writer/api/v1/export_import.py b/nucliadb/src/nucliadb/writer/api/v1/export_import.py
index c6d3be7dc0..97e89225e5 100644
--- a/nucliadb/src/nucliadb/writer/api/v1/export_import.py
+++ b/nucliadb/src/nucliadb/writer/api/v1/export_import.py
@@ -112,7 +112,7 @@ async def kb_create_and_import_endpoint(request: Request):
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
import_kb_config = KnowledgeBoxConfig(
title=f"Imported KB - {now}",
- learning_configuration=learning_config.dict(),
+ learning_configuration=learning_config.model_dump(),
)
kbid, slug = await create_kb(import_kb_config)
diff --git a/nucliadb/tests/conftest.py b/nucliadb/tests/conftest.py
index f994cf5068..057a73fa45 100644
--- a/nucliadb/tests/conftest.py
+++ b/nucliadb/tests/conftest.py
@@ -46,6 +46,9 @@
"tests.ndbfixtures.processing",
# useful resources for tests (KBs, resources, ...)
"tests.ndbfixtures.resources",
+ "tests.nucliadb.knowledgeboxes",
+ # legacy fixtures waiting for a better place
+ "tests.ndbfixtures.legacy",
]
)
diff --git a/nucliadb/tests/fixtures.py b/nucliadb/tests/fixtures.py
deleted file mode 100644
index cb3ca6ab04..0000000000
--- a/nucliadb/tests/fixtures.py
+++ /dev/null
@@ -1,417 +0,0 @@
-# Copyright (C) 2021 Bosutech XXI S.L.
-#
-# nucliadb is offered under the AGPL v3.0 and as commercial software.
-# For commercial licensing, contact us at info@nuclia.com.
-#
-# AGPL:
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-#
-import logging
-from unittest.mock import AsyncMock, Mock
-
-import pytest
-from grpc import aio
-from httpx import AsyncClient
-
-from nucliadb.common.cluster import manager as cluster_manager
-from nucliadb.standalone.settings import Settings
-from nucliadb.writer import API_PREFIX
-from nucliadb_protos.train_pb2_grpc import TrainStub
-from nucliadb_protos.utils_pb2 import Relation, RelationNode
-from nucliadb_protos.writer_pb2 import BrokerMessage
-from nucliadb_protos.writer_pb2_grpc import WriterStub
-from nucliadb_utils.aiopynecone.models import QueryResponse
-from nucliadb_utils.utilities import (
- Utility,
- clean_pinecone,
- clean_utility,
- get_pinecone,
- get_utility,
- set_utility,
-)
-from tests.utils import inject_message
-from tests.utils.dirty_index import mark_dirty, wait_for_sync
-
-logger = logging.getLogger(__name__)
-
-
-@pytest.fixture(scope="function")
-async def nucliadb_reader(nucliadb: Settings):
- async with AsyncClient(
- headers={"X-NUCLIADB-ROLES": "READER"},
- base_url=f"http://localhost:{nucliadb.http_port}/{API_PREFIX}/v1",
- timeout=None,
- event_hooks={"request": [wait_for_sync]},
- ) as client:
- yield client
-
-
-@pytest.fixture(scope="function")
-async def nucliadb_writer(nucliadb: Settings):
- async with AsyncClient(
- headers={"X-NUCLIADB-ROLES": "WRITER"},
- base_url=f"http://localhost:{nucliadb.http_port}/{API_PREFIX}/v1",
- timeout=None,
- event_hooks={"request": [mark_dirty]},
- ) as client:
- yield client
-
-
-@pytest.fixture(scope="function")
-async def nucliadb_manager(nucliadb: Settings):
- async with AsyncClient(
- headers={"X-NUCLIADB-ROLES": "MANAGER"},
- base_url=f"http://localhost:{nucliadb.http_port}/{API_PREFIX}/v1",
- timeout=None,
- event_hooks={"request": [mark_dirty]},
- ) as client:
- yield client
-
-
-@pytest.fixture(scope="function")
-async def knowledgebox(nucliadb_manager: AsyncClient):
- resp = await nucliadb_manager.post("/kbs", json={"slug": "knowledgebox"})
- assert resp.status_code == 201
- uuid = resp.json().get("uuid")
-
- yield uuid
-
- resp = await nucliadb_manager.delete(f"/kb/{uuid}")
- assert resp.status_code == 200
-
-
-@pytest.fixture(scope="function")
-def pinecone_data_plane():
- dp = Mock()
- dp.upsert = AsyncMock(return_value=None)
- dp.query = AsyncMock(
- return_value=QueryResponse(
- matches=[],
- )
- )
- return dp
-
-
-@pytest.fixture(scope="function")
-def pinecone_control_plane():
- cp = Mock()
- cp.create_index = AsyncMock(return_value="pinecone-host")
- cp.delete_index = AsyncMock(return_value=None)
- return cp
-
-
-@pytest.fixture(scope="function")
-def pinecone_mock(pinecone_data_plane, pinecone_control_plane):
- pinecone_session = get_pinecone()
- pinecone_session.data_plane = Mock(return_value=pinecone_data_plane)
- pinecone_session.control_plane = Mock(return_value=pinecone_control_plane)
- yield
- clean_pinecone()
-
-
-@pytest.fixture(scope="function")
-async def pinecone_knowledgebox(nucliadb_manager: AsyncClient, pinecone_mock):
- resp = await nucliadb_manager.post(
- "/kbs",
- json={
- "slug": "pinecone_knowledgebox",
- "external_index_provider": {
- "type": "pinecone",
- "api_key": "my-pinecone-api-key",
- "serverless_cloud": "aws_us_east_1",
- },
- },
- )
- assert resp.status_code == 201
- uuid = resp.json().get("uuid")
-
- yield uuid
-
- resp = await nucliadb_manager.delete(f"/kb/{uuid}")
- assert resp.status_code == 200
-
-
-@pytest.fixture(scope="function")
-async def nucliadb_grpc(nucliadb: Settings):
- stub = WriterStub(aio.insecure_channel(f"localhost:{nucliadb.ingest_grpc_port}"))
- return stub
-
-
-@pytest.fixture(scope="function")
-async def nucliadb_train(nucliadb: Settings):
- stub = TrainStub(aio.insecure_channel(f"localhost:{nucliadb.train_grpc_port}"))
- return stub
-
-
-@pytest.fixture(scope="function")
-async def knowledge_graph(nucliadb_writer: AsyncClient, nucliadb_grpc: WriterStub, knowledgebox):
- resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
- json={
- "title": "Knowledge graph",
- "slug": "knowledgegraph",
- "summary": "Test knowledge graph",
- },
- )
- assert resp.status_code == 201
- rid = resp.json()["uuid"]
-
- nodes = {
- "Animal": RelationNode(value="Animal", ntype=RelationNode.NodeType.ENTITY, subtype=""),
- "Batman": RelationNode(value="Batman", ntype=RelationNode.NodeType.ENTITY, subtype=""),
- "Becquer": RelationNode(value="Becquer", ntype=RelationNode.NodeType.ENTITY, subtype=""),
- "Cat": RelationNode(value="Cat", ntype=RelationNode.NodeType.ENTITY, subtype=""),
- "Catwoman": RelationNode(value="Catwoman", ntype=RelationNode.NodeType.ENTITY, subtype=""),
- "Eric": RelationNode(value="Eric", ntype=RelationNode.NodeType.ENTITY, subtype=""),
- "Fly": RelationNode(value="Fly", ntype=RelationNode.NodeType.ENTITY, subtype=""),
- "Gravity": RelationNode(value="Gravity", ntype=RelationNode.NodeType.ENTITY, subtype=""),
- "Joan Antoni": RelationNode(value="Joan Antoni", ntype=RelationNode.NodeType.ENTITY, subtype=""),
- "Joker": RelationNode(value="Joker", ntype=RelationNode.NodeType.ENTITY, subtype=""),
- "Newton": RelationNode(value="Newton", ntype=RelationNode.NodeType.ENTITY, subtype="science"),
- "Isaac Newsome": RelationNode(
- value="Isaac Newsome", ntype=RelationNode.NodeType.ENTITY, subtype="science"
- ),
- "Physics": RelationNode(value="Physics", ntype=RelationNode.NodeType.ENTITY, subtype="science"),
- "Poetry": RelationNode(value="Poetry", ntype=RelationNode.NodeType.ENTITY, subtype=""),
- "Swallow": RelationNode(value="Swallow", ntype=RelationNode.NodeType.ENTITY, subtype=""),
- }
-
- edges = [
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Batman"],
- to=nodes["Catwoman"],
- relation_label="love",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Batman"],
- to=nodes["Joker"],
- relation_label="fight",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Joker"],
- to=nodes["Physics"],
- relation_label="enjoy",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Catwoman"],
- to=nodes["Cat"],
- relation_label="imitate",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Cat"],
- to=nodes["Animal"],
- relation_label="species",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Newton"],
- to=nodes["Physics"],
- relation_label="study",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Newton"],
- to=nodes["Gravity"],
- relation_label="formulate",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Isaac Newsome"],
- to=nodes["Physics"],
- relation_label="study",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Isaac Newsome"],
- to=nodes["Gravity"],
- relation_label="formulate",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Eric"],
- to=nodes["Cat"],
- relation_label="like",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Eric"],
- to=nodes["Joan Antoni"],
- relation_label="collaborate",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Joan Antoni"],
- to=nodes["Eric"],
- relation_label="collaborate",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Joan Antoni"],
- to=nodes["Becquer"],
- relation_label="read",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Becquer"],
- to=nodes["Poetry"],
- relation_label="write",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Becquer"],
- to=nodes["Poetry"],
- relation_label="like",
- ),
- Relation(
- relation=Relation.RelationType.ABOUT,
- source=nodes["Poetry"],
- to=nodes["Swallow"],
- relation_label="about",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Swallow"],
- to=nodes["Animal"],
- relation_label="species",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Swallow"],
- to=nodes["Fly"],
- relation_label="can",
- ),
- Relation(
- relation=Relation.RelationType.ENTITY,
- source=nodes["Fly"],
- to=nodes["Gravity"],
- relation_label="defy",
- ),
- ]
-
- bm = BrokerMessage()
- bm.uuid = rid
- bm.kbid = knowledgebox
- bm.relations.extend(edges)
- await inject_message(nucliadb_grpc, bm)
- await wait_for_sync()
-
- resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/entitiesgroups",
- json={
- "title": "scientist",
- "color": "",
- "entities": {
- "Isaac": {"value": "Isaac"},
- "Isaac Newton": {"value": "Isaac Newton", "represents": ["Newton"]},
- "Isaac Newsome": {"value": "Isaac Newsome"},
- },
- "custom": True,
- "group": "scientist",
- },
- )
- assert resp.status_code == 200, resp.content
- resp = await nucliadb_writer.patch(
- f"/kb/{knowledgebox}/entitiesgroup/scientist",
- json={"add": {}, "update": {}, "delete": ["Isaac Newsome"]},
- )
- assert resp.status_code == 200, resp.content
- resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/entitiesgroups",
- json={
- "title": "poet",
- "color": "",
- "entities": {
- "Becquer": {
- "value": "Becquer",
- "represents": ["Gustavo Adolfo Bécquer"],
- },
- "Gustavo Adolfo Bécquer": {"value": "Gustavo Adolfo Bécquer"},
- },
- "custom": True,
- "group": "poet",
- },
- )
- assert resp.status_code == 200, resp.content
-
- return (nodes, edges)
-
-
-# TODO: remove after migrating tests/nucliadb/ to ndbfixtures. fixture already
-# moved to ndbfixtures.common
-@pytest.fixture(scope="function")
-async def stream_audit(natsd: str, mocker):
- from nucliadb_utils.audit.stream import StreamAuditStorage
- from nucliadb_utils.settings import audit_settings
-
- audit = StreamAuditStorage(
- [natsd],
- audit_settings.audit_jetstream_target, # type: ignore
- audit_settings.audit_partitions,
- audit_settings.audit_hash_seed,
- )
- await audit.initialize()
-
- mocker.spy(audit, "send")
- mocker.spy(audit.js, "publish")
- mocker.spy(audit, "search")
- mocker.spy(audit, "chat")
-
- set_utility(Utility.AUDIT, audit)
- yield audit
- await audit.finalize()
-
-
-@pytest.fixture(scope="function")
-def predict_mock() -> Mock: # type: ignore
- predict = get_utility(Utility.PREDICT)
- mock = Mock()
- set_utility(Utility.PREDICT, mock)
-
- yield mock
-
- if predict is None:
- clean_utility(Utility.PREDICT)
- else:
- set_utility(Utility.PREDICT, predict)
-
-
-@pytest.fixture(scope="function")
-def metrics_registry():
- import prometheus_client.registry
-
- for collector in prometheus_client.registry.REGISTRY._names_to_collectors.values():
- if not hasattr(collector, "_metrics"):
- continue
- collector._metrics.clear()
- yield prometheus_client.registry.REGISTRY
-
-
-@pytest.fixture(scope="function")
-async def txn(maindb_driver):
- async with maindb_driver.transaction() as txn:
- yield txn
- await txn.abort()
-
-
-@pytest.fixture(scope="function")
-async def shard_manager(storage, maindb_driver):
- mng = cluster_manager.KBShardManager()
- set_utility(Utility.SHARD_MANAGER, mng)
- yield mng
- clean_utility(Utility.SHARD_MANAGER)
diff --git a/nucliadb/tests/ingest/conftest.py b/nucliadb/tests/ingest/conftest.py
index afd1b1582e..bf0f56b222 100644
--- a/nucliadb/tests/ingest/conftest.py
+++ b/nucliadb/tests/ingest/conftest.py
@@ -21,7 +21,6 @@
"pytest_docker_fixtures",
"nucliadb_utils.tests.nats",
"tests.ingest.fixtures",
- "tests.fixtures",
"tests.ndbfixtures.maindb",
"tests.ndbfixtures.processing",
"tests.ndbfixtures.standalone",
diff --git a/nucliadb/tests/ndbfixtures/common.py b/nucliadb/tests/ndbfixtures/common.py
index 13e110d567..d51b61ccfc 100644
--- a/nucliadb/tests/ndbfixtures/common.py
+++ b/nucliadb/tests/ndbfixtures/common.py
@@ -18,20 +18,27 @@
# along with this program. If not, see .
#
from os.path import dirname
-from typing import AsyncIterator, Iterator
+from typing import AsyncIterable, AsyncIterator, Iterator
+from unittest.mock import Mock, patch
import pytest
from pytest_mock import MockerFixture
from nucliadb.common.cluster.manager import KBShardManager
from nucliadb.common.maindb.driver import Driver
+from nucliadb.search.predict import DummyPredictEngine
from nucliadb_utils.audit.audit import AuditStorage
from nucliadb_utils.audit.basic import BasicAuditStorage
from nucliadb_utils.audit.stream import StreamAuditStorage
-from nucliadb_utils.settings import audit_settings
+from nucliadb_utils.settings import (
+ audit_settings,
+ nuclia_settings,
+)
from nucliadb_utils.storages.settings import settings as storage_settings
from nucliadb_utils.storages.storage import Storage
-from nucliadb_utils.utilities import Utility, clean_utility, set_utility
+from nucliadb_utils.utilities import (
+ Utility,
+)
from tests.ndbfixtures.utils import global_utility
# Audit
@@ -80,14 +87,33 @@ async def local_files():
storage_settings.local_testing_files = f"{dirname(__file__)}"
+# Predict
+
+
+@pytest.fixture(scope="function")
+def predict_mock() -> Mock: # type: ignore
+ mock = Mock()
+ with global_utility(Utility.PREDICT, mock):
+ yield mock
+
+
+@pytest.fixture(scope="function")
+async def dummy_predict() -> AsyncIterable[DummyPredictEngine]:
+ with (
+ patch.object(nuclia_settings, "dummy_predict", True),
+ ):
+ predict_util = DummyPredictEngine()
+ await predict_util.initialize()
+
+ with global_utility(Utility.PREDICT, predict_util):
+ yield predict_util
+
+
# Shard manager
@pytest.fixture(scope="function")
async def shard_manager(storage: Storage, maindb_driver: Driver) -> AsyncIterator[KBShardManager]:
sm = KBShardManager()
- set_utility(Utility.SHARD_MANAGER, sm)
-
- yield sm
-
- clean_utility(Utility.SHARD_MANAGER)
+ with global_utility(Utility.SHARD_MANAGER, sm):
+ yield sm
diff --git a/nucliadb/tests/ndbfixtures/ingest.py b/nucliadb/tests/ndbfixtures/ingest.py
index f78284bf48..a397f8c691 100644
--- a/nucliadb/tests/ndbfixtures/ingest.py
+++ b/nucliadb/tests/ndbfixtures/ingest.py
@@ -70,6 +70,12 @@ async def standalone_nucliadb_ingest_grpc(nucliadb: Settings) -> AsyncIterator[W
await channel.close(grace=None)
+# alias to ease migration to new ndbfixtures
+@pytest.fixture(scope="function")
+async def standalone_nucliadb_grpc(standalone_nucliadb_ingest_grpc):
+ yield standalone_nucliadb_ingest_grpc
+
+
# Utils
diff --git a/nucliadb/tests/nucliadb/conftest.py b/nucliadb/tests/ndbfixtures/legacy.py
similarity index 61%
rename from nucliadb/tests/nucliadb/conftest.py
rename to nucliadb/tests/ndbfixtures/legacy.py
index 729a27e908..c350580bd6 100644
--- a/nucliadb/tests/nucliadb/conftest.py
+++ b/nucliadb/tests/ndbfixtures/legacy.py
@@ -17,20 +17,18 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
#
-pytest_plugins = [
- "pytest_docker_fixtures",
- "tests.fixtures",
- "tests.ndbfixtures.maindb",
- "tests.ndbfixtures.processing",
- "tests.ndbfixtures.standalone",
- "tests.nucliadb.knowledgeboxes",
- "nucliadb_utils.tests.asyncbenchmark",
- "nucliadb_utils.tests.nats",
- "tests.ndbfixtures.nidx",
- "tests.ingest.fixtures",
- "nucliadb_utils.tests.fixtures",
- "nucliadb_utils.tests.gcs",
- "nucliadb_utils.tests.azure",
- "nucliadb_utils.tests.s3",
- "nucliadb_utils.tests.local",
-]
+
+from typing import AsyncIterator
+
+import pytest
+
+from nucliadb.common.maindb.driver import Driver, Transaction
+
+# Dependents: search, nucliadb
+
+
+@pytest.fixture(scope="function")
+async def txn(maindb_driver: Driver) -> AsyncIterator[Transaction]:
+ async with maindb_driver.transaction() as txn:
+ yield txn
+ await txn.abort()
diff --git a/nucliadb/tests/ndbfixtures/magic.py b/nucliadb/tests/ndbfixtures/magic.py
index be95b4d03c..540b1f5c7b 100644
--- a/nucliadb/tests/ndbfixtures/magic.py
+++ b/nucliadb/tests/ndbfixtures/magic.py
@@ -43,6 +43,7 @@
],
"nucliadb_train_grpc": [
"component",
+ "standalone",
],
"nucliadb_ingest_grpc": [
"component",
diff --git a/nucliadb/tests/ndbfixtures/resources.py b/nucliadb/tests/ndbfixtures/resources.py
index d85e90dfe8..12415b6d73 100644
--- a/nucliadb/tests/ndbfixtures/resources.py
+++ b/nucliadb/tests/ndbfixtures/resources.py
@@ -18,6 +18,7 @@
# along with this program. If not, see .
#
import asyncio
+import logging
import time
import uuid
from typing import AsyncIterator
@@ -35,8 +36,15 @@
from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX
from nucliadb_protos import utils_pb2 as upb
from nucliadb_protos.knowledgebox_pb2 import SemanticModelMetadata
+from nucliadb_protos.utils_pb2 import Relation, RelationNode
+from nucliadb_protos.writer_pb2 import BrokerMessage
+from nucliadb_protos.writer_pb2_grpc import WriterStub
from nucliadb_utils.storages.storage import Storage
+from tests.utils import inject_message
from tests.utils.broker_messages import BrokerMessageBuilder
+from tests.utils.dirty_index import wait_for_sync
+
+logger = logging.getLogger(__name__)
@pytest.fixture(scope="function")
@@ -60,6 +68,18 @@ async def knowledgebox(
# await KnowledgeBox.purge(maindb_driver, kbid)
+@pytest.fixture(scope="function")
+async def standalone_knowledgebox(nucliadb_writer_manager: AsyncClient):
+ resp = await nucliadb_writer_manager.post("/kbs", json={"slug": "knowledgebox"})
+ assert resp.status_code == 201
+ uuid = resp.json().get("uuid")
+
+ yield uuid
+
+ resp = await nucliadb_writer_manager.delete(f"/kb/{uuid}")
+ assert resp.status_code == 200
+
+
# FIXME: this is a weird situation, we can use a hosted-like nucliadb while this
# creates a KB as it was onprem. The end result should not change much but still, is
# something we may want to fix
@@ -164,3 +184,204 @@ async def simple_resources(
await asyncio.sleep(0.1)
yield knowledgebox, resource_ids
+
+
+# Only supported for standalone (as it depends on standalone_knowledgebox fixture)
+@pytest.fixture(scope="function")
+async def knowledge_graph(
+ nucliadb_writer: AsyncClient, nucliadb_ingest_grpc: WriterStub, standalone_knowledgebox: str
+):
+ resp = await nucliadb_writer.post(
+ f"/kb/{standalone_knowledgebox}/resources",
+ json={
+ "title": "Knowledge graph",
+ "slug": "knowledgegraph",
+ "summary": "Test knowledge graph",
+ },
+ )
+ assert resp.status_code == 201
+ rid = resp.json()["uuid"]
+
+ nodes = {
+ "Animal": RelationNode(value="Animal", ntype=RelationNode.NodeType.ENTITY, subtype=""),
+ "Batman": RelationNode(value="Batman", ntype=RelationNode.NodeType.ENTITY, subtype=""),
+ "Becquer": RelationNode(value="Becquer", ntype=RelationNode.NodeType.ENTITY, subtype=""),
+ "Cat": RelationNode(value="Cat", ntype=RelationNode.NodeType.ENTITY, subtype=""),
+ "Catwoman": RelationNode(value="Catwoman", ntype=RelationNode.NodeType.ENTITY, subtype=""),
+ "Eric": RelationNode(value="Eric", ntype=RelationNode.NodeType.ENTITY, subtype=""),
+ "Fly": RelationNode(value="Fly", ntype=RelationNode.NodeType.ENTITY, subtype=""),
+ "Gravity": RelationNode(value="Gravity", ntype=RelationNode.NodeType.ENTITY, subtype=""),
+ "Joan Antoni": RelationNode(value="Joan Antoni", ntype=RelationNode.NodeType.ENTITY, subtype=""),
+ "Joker": RelationNode(value="Joker", ntype=RelationNode.NodeType.ENTITY, subtype=""),
+ "Newton": RelationNode(value="Newton", ntype=RelationNode.NodeType.ENTITY, subtype="science"),
+ "Isaac Newsome": RelationNode(
+ value="Isaac Newsome", ntype=RelationNode.NodeType.ENTITY, subtype="science"
+ ),
+ "Physics": RelationNode(value="Physics", ntype=RelationNode.NodeType.ENTITY, subtype="science"),
+ "Poetry": RelationNode(value="Poetry", ntype=RelationNode.NodeType.ENTITY, subtype=""),
+ "Swallow": RelationNode(value="Swallow", ntype=RelationNode.NodeType.ENTITY, subtype=""),
+ }
+
+ edges = [
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Batman"],
+ to=nodes["Catwoman"],
+ relation_label="love",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Batman"],
+ to=nodes["Joker"],
+ relation_label="fight",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Joker"],
+ to=nodes["Physics"],
+ relation_label="enjoy",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Catwoman"],
+ to=nodes["Cat"],
+ relation_label="imitate",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Cat"],
+ to=nodes["Animal"],
+ relation_label="species",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Newton"],
+ to=nodes["Physics"],
+ relation_label="study",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Newton"],
+ to=nodes["Gravity"],
+ relation_label="formulate",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Isaac Newsome"],
+ to=nodes["Physics"],
+ relation_label="study",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Isaac Newsome"],
+ to=nodes["Gravity"],
+ relation_label="formulate",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Eric"],
+ to=nodes["Cat"],
+ relation_label="like",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Eric"],
+ to=nodes["Joan Antoni"],
+ relation_label="collaborate",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Joan Antoni"],
+ to=nodes["Eric"],
+ relation_label="collaborate",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Joan Antoni"],
+ to=nodes["Becquer"],
+ relation_label="read",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Becquer"],
+ to=nodes["Poetry"],
+ relation_label="write",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Becquer"],
+ to=nodes["Poetry"],
+ relation_label="like",
+ ),
+ Relation(
+ relation=Relation.RelationType.ABOUT,
+ source=nodes["Poetry"],
+ to=nodes["Swallow"],
+ relation_label="about",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Swallow"],
+ to=nodes["Animal"],
+ relation_label="species",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Swallow"],
+ to=nodes["Fly"],
+ relation_label="can",
+ ),
+ Relation(
+ relation=Relation.RelationType.ENTITY,
+ source=nodes["Fly"],
+ to=nodes["Gravity"],
+ relation_label="defy",
+ ),
+ ]
+
+ bm = BrokerMessage()
+ bm.uuid = rid
+ bm.kbid = standalone_knowledgebox
+ bm.relations.extend(edges)
+ await inject_message(nucliadb_ingest_grpc, bm)
+ await wait_for_sync()
+
+ resp = await nucliadb_writer.post(
+ f"/kb/{standalone_knowledgebox}/entitiesgroups",
+ json={
+ "title": "scientist",
+ "color": "",
+ "entities": {
+ "Isaac": {"value": "Isaac"},
+ "Isaac Newton": {"value": "Isaac Newton", "represents": ["Newton"]},
+ "Isaac Newsome": {"value": "Isaac Newsome"},
+ },
+ "custom": True,
+ "group": "scientist",
+ },
+ )
+ assert resp.status_code == 200, resp.content
+ resp = await nucliadb_writer.patch(
+ f"/kb/{standalone_knowledgebox}/entitiesgroup/scientist",
+ json={"add": {}, "update": {}, "delete": ["Isaac Newsome"]},
+ )
+ assert resp.status_code == 200, resp.content
+ resp = await nucliadb_writer.post(
+ f"/kb/{standalone_knowledgebox}/entitiesgroups",
+ json={
+ "title": "poet",
+ "color": "",
+ "entities": {
+ "Becquer": {
+ "value": "Becquer",
+ "represents": ["Gustavo Adolfo Bécquer"],
+ },
+ "Gustavo Adolfo Bécquer": {"value": "Gustavo Adolfo Bécquer"},
+ },
+ "custom": True,
+ "group": "poet",
+ },
+ )
+ assert resp.status_code == 200, resp.content
+
+ return (nodes, edges)
diff --git a/nucliadb/tests/ndbfixtures/search.py b/nucliadb/tests/ndbfixtures/search.py
index 559fb46c1d..eae1b614ee 100644
--- a/nucliadb/tests/ndbfixtures/search.py
+++ b/nucliadb/tests/ndbfixtures/search.py
@@ -18,7 +18,6 @@
# along with this program. If not, see .
import asyncio
-from typing import AsyncIterable
from unittest.mock import patch
import pytest
@@ -31,7 +30,6 @@
from nucliadb.ingest.cache import clear_ingest_cache
from nucliadb.ingest.settings import settings as ingest_settings
from nucliadb.search.app import application
-from nucliadb.search.predict import DummyPredictEngine
from nucliadb_models.resource import NucliaDBRoles
from nucliadb_protos.nodereader_pb2 import GetShardRequest
from nucliadb_protos.noderesources_pb2 import Shard
@@ -45,11 +43,10 @@
from nucliadb_utils.tests import free_port
from nucliadb_utils.transaction import TransactionUtility
from nucliadb_utils.utilities import (
- Utility,
clear_global_cache,
)
from tests.ingest.fixtures import broker_resource
-from tests.ndbfixtures.utils import create_api_client_factory, global_utility
+from tests.ndbfixtures.utils import create_api_client_factory
# Main fixtures
@@ -88,18 +85,6 @@ async def cluster_nucliadb_search(
# Rest, TODO keep cleaning
-@pytest.fixture(scope="function")
-async def dummy_predict() -> AsyncIterable[DummyPredictEngine]:
- with (
- patch.object(nuclia_settings, "dummy_predict", True),
- ):
- predict_util = DummyPredictEngine()
- await predict_util.initialize()
-
- with global_utility(Utility.PREDICT, predict_util):
- yield predict_util
-
-
@pytest.fixture(scope="function")
async def test_search_resource(
indexing_utility_registered,
@@ -190,13 +175,3 @@ async def wait_for_shard(knowledgebox_ingest: str, count: int) -> str:
# Wait an extra couple of seconds for reader/searcher to catch up
await asyncio.sleep(2)
return knowledgebox_ingest
-
-
-# Dependencies from tests/fixtures.py
-
-
-@pytest.fixture(scope="function")
-async def txn(maindb_driver):
- async with maindb_driver.transaction() as txn:
- yield txn
- await txn.abort()
diff --git a/nucliadb/tests/ndbfixtures/train.py b/nucliadb/tests/ndbfixtures/train.py
index 4b13ee10af..b24a4be975 100644
--- a/nucliadb/tests/ndbfixtures/train.py
+++ b/nucliadb/tests/ndbfixtures/train.py
@@ -79,7 +79,16 @@ class TrainGrpcServer:
@pytest.fixture(scope="function")
async def component_nucliadb_train_grpc(train_grpc_server: TrainGrpcServer) -> AsyncIterator[TrainStub]:
channel = aio.insecure_channel(f"localhost:{train_grpc_server.port}")
- yield TrainStub(channel)
+ stub = TrainStub(channel)
+ yield stub
+ await channel.close(grace=None)
+
+
+@pytest.fixture(scope="function")
+async def standalone_nucliadb_train_grpc(nucliadb: Settings) -> AsyncIterator[TrainStub]:
+ channel = aio.insecure_channel(f"localhost:{nucliadb.train_grpc_port}")
+ stub = TrainStub(channel)
+ yield stub
await channel.close(grace=None)
diff --git a/nucliadb/tests/nucliadb/benchmarks/test_search.py b/nucliadb/tests/nucliadb/benchmarks/test_search.py
index 8edf2ad1e3..5f5971ef98 100644
--- a/nucliadb/tests/nucliadb/benchmarks/test_search.py
+++ b/nucliadb/tests/nucliadb/benchmarks/test_search.py
@@ -41,19 +41,20 @@
disable_gc=True,
warmup=False,
)
+@pytest.mark.deploy_modes("standalone")
async def test_search_returns_labels(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
asyncbenchmark: AsyncBenchmarkFixture,
):
- bm = broker_resource_with_classifications(knowledgebox)
- await inject_message(nucliadb_grpc, bm)
+ bm = broker_resource_with_classifications(standalone_knowledgebox)
+ await inject_message(nucliadb_ingest_grpc, bm)
resp = await asyncbenchmark(
nucliadb_reader.get,
- f"/kb/{knowledgebox}/search?query=Some&show=extracted&extracted=metadata",
+ f"/kb/{standalone_knowledgebox}/search?query=Some&show=extracted&extracted=metadata",
)
assert resp.status_code == 200
@@ -67,11 +68,12 @@ async def test_search_returns_labels(
disable_gc=True,
warmup=False,
)
+@pytest.mark.deploy_modes("standalone")
async def test_search_relations(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
knowledge_graph,
asyncbenchmark: AsyncBenchmarkFixture,
):
@@ -86,7 +88,7 @@ async def test_search_relations(
resp = await asyncbenchmark(
nucliadb_reader.get,
- f"/kb/{knowledgebox}/search",
+ f"/kb/{standalone_knowledgebox}/search",
params={
"features": "relations",
"query": "What relates Newton and Becquer?",
diff --git a/nucliadb/tests/nucliadb/integration/common/cluster/test_rebalance.py b/nucliadb/tests/nucliadb/integration/common/cluster/test_rebalance.py
index 4399f47571..27da7e56ce 100644
--- a/nucliadb/tests/nucliadb/integration/common/cluster/test_rebalance.py
+++ b/nucliadb/tests/nucliadb/integration/common/cluster/test_rebalance.py
@@ -37,16 +37,17 @@ async def app_context(natsd, storage, nucliadb):
await ctx.finalize()
+@pytest.mark.deploy_modes("standalone")
async def test_rebalance_kb_shards(
app_context,
- knowledgebox,
+ standalone_knowledgebox,
nucliadb_writer: AsyncClient,
- nucliadb_manager: AsyncClient,
+ nucliadb_reader_manager: AsyncClient,
):
count = 10
for i in range(count):
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"slug": f"myresource-{i}",
"title": f"My Title {i}",
@@ -60,20 +61,20 @@ async def test_rebalance_kb_shards(
)
assert resp.status_code == 201
- counters1_resp = await nucliadb_manager.get(f"/kb/{knowledgebox}/counters")
- shards1_resp = await nucliadb_manager.get(f"/kb/{knowledgebox}/shards")
+ counters1_resp = await nucliadb_reader_manager.get(f"/kb/{standalone_knowledgebox}/counters")
+ shards1_resp = await nucliadb_reader_manager.get(f"/kb/{standalone_knowledgebox}/shards")
counters1 = counters1_resp.json()
shards1 = shards1_resp.json()
assert len(shards1["shards"]) == 1
with patch.object(settings, "max_shard_paragraphs", counters1["paragraphs"] / 2):
- await rebalance.rebalance_kb(app_context, knowledgebox)
+ await rebalance.rebalance_kb(app_context, standalone_knowledgebox)
- shards2_resp = await nucliadb_manager.get(f"/kb/{knowledgebox}/shards")
+ shards2_resp = await nucliadb_reader_manager.get(f"/kb/{standalone_knowledgebox}/shards")
shards2 = shards2_resp.json()
assert len(shards2["shards"]) == 2
# if we run it again, we should get another shard
with patch.object(settings, "max_shard_paragraphs", counters1["paragraphs"] / 2):
- await rebalance.rebalance_kb(app_context, knowledgebox)
+ await rebalance.rebalance_kb(app_context, standalone_knowledgebox)
diff --git a/nucliadb/tests/nucliadb/integration/common/cluster/test_rollover.py b/nucliadb/tests/nucliadb/integration/common/cluster/test_rollover.py
index 2bfb23b487..379cab114a 100644
--- a/nucliadb/tests/nucliadb/integration/common/cluster/test_rollover.py
+++ b/nucliadb/tests/nucliadb/integration/common/cluster/test_rollover.py
@@ -39,27 +39,33 @@ async def app_context(natsd, storage, nucliadb):
await ctx.finalize()
+@pytest.mark.deploy_modes("standalone")
async def test_rollover_kb_index(
app_context: ApplicationContext,
- knowledgebox,
+ standalone_knowledgebox,
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- nucliadb_manager: AsyncClient,
+ nucliadb_reader_manager: AsyncClient,
):
await _test_rollover_kb_index(
- app_context, knowledgebox, nucliadb_writer, nucliadb_reader, nucliadb_manager
+ app_context, standalone_knowledgebox, nucliadb_writer, nucliadb_reader, nucliadb_reader_manager
)
+@pytest.mark.deploy_modes("standalone")
async def test_rollover_kb_index_with_vectorsets(
app_context: ApplicationContext,
knowledgebox_with_vectorsets: str,
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- nucliadb_manager: AsyncClient,
+ nucliadb_reader_manager: AsyncClient,
):
await _test_rollover_kb_index(
- app_context, knowledgebox_with_vectorsets, nucliadb_writer, nucliadb_reader, nucliadb_manager
+ app_context,
+ knowledgebox_with_vectorsets,
+ nucliadb_writer,
+ nucliadb_reader,
+ nucliadb_reader_manager,
)
@@ -68,7 +74,7 @@ async def _test_rollover_kb_index(
kbid: str,
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- nucliadb_manager: AsyncClient,
+ nucliadb_reader_manager: AsyncClient,
):
count = 20
for i in range(count):
@@ -83,13 +89,13 @@ async def _test_rollover_kb_index(
)
assert resp.status_code == 201
- resp = await nucliadb_manager.get(f"/kb/{kbid}/shards")
+ resp = await nucliadb_reader_manager.get(f"/kb/{kbid}/shards")
assert resp.status_code == 200, resp.text
shards_body1 = resp.json()
await rollover.rollover_kb_index(app_context, kbid)
- resp = await nucliadb_manager.get(f"/kb/{kbid}/shards")
+ resp = await nucliadb_reader_manager.get(f"/kb/{kbid}/shards")
assert resp.status_code == 200, resp.text
shards_body2 = resp.json()
# check that shards have changed
@@ -107,6 +113,7 @@ async def _test_rollover_kb_index(
assert len(body["resources"]) == count
+@pytest.mark.deploy_modes("standalone")
async def test_rollover_kb_index_does_a_clean_cutover(
app_context,
knowledgebox,
@@ -124,12 +131,13 @@ async def get_kb_shards(kbid: str):
assert shards2.extra == {}
+@pytest.mark.deploy_modes("standalone")
async def test_rollover_kb_index_handles_changes_in_between(
app_context,
knowledgebox,
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- nucliadb_manager: AsyncClient,
+ nucliadb_reader_manager: AsyncClient,
):
count = 50
resources = []
diff --git a/nucliadb/tests/nucliadb/integration/migrator/test_migrator.py b/nucliadb/tests/nucliadb/integration/migrator/test_migrator.py
index 950213bae0..414477a2e3 100644
--- a/nucliadb/tests/nucliadb/integration/migrator/test_migrator.py
+++ b/nucliadb/tests/nucliadb/integration/migrator/test_migrator.py
@@ -20,6 +20,7 @@
import uuid
import pytest
+from httpx import AsyncClient
from nucliadb.migrator import migrator
from nucliadb.migrator.context import ExecutionContext
@@ -38,12 +39,13 @@ async def execution_context(natsd, storage, nucliadb):
await context.finalize()
-async def test_migrate_kb(execution_context: ExecutionContext, knowledgebox):
+@pytest.mark.deploy_modes("standalone")
+async def test_migrate_kb(execution_context: ExecutionContext, standalone_knowledgebox):
# this will test run all available migrations
- await execution_context.data_manager.update_kb_info(kbid=knowledgebox, current_version=-1)
+ await execution_context.data_manager.update_kb_info(kbid=standalone_knowledgebox, current_version=-1)
await execution_context.data_manager.update_global_info(current_version=0)
- kb_info = await execution_context.data_manager.get_kb_info(kbid=knowledgebox)
+ kb_info = await execution_context.data_manager.get_kb_info(kbid=standalone_knowledgebox)
assert kb_info is not None
assert kb_info.current_version == -1
global_info = await execution_context.data_manager.get_global_info()
@@ -53,7 +55,7 @@ async def test_migrate_kb(execution_context: ExecutionContext, knowledgebox):
# other tests can be so slow and cumbersome to maintain
await migrator.run(execution_context, target_version=1)
- kb_info = await execution_context.data_manager.get_kb_info(kbid=knowledgebox)
+ kb_info = await execution_context.data_manager.get_kb_info(kbid=standalone_knowledgebox)
assert kb_info is not None
assert kb_info.current_version == 1
global_info = await execution_context.data_manager.get_global_info()
@@ -61,20 +63,21 @@ async def test_migrate_kb(execution_context: ExecutionContext, knowledgebox):
@pytest.fixture(scope="function")
-async def two_knowledgeboxes(nucliadb_manager):
+async def two_knowledgeboxes(nucliadb_writer_manager: AsyncClient):
kbs = []
for _ in range(2):
- resp = await nucliadb_manager.post("/kbs", json={"slug": uuid.uuid4().hex})
+ resp = await nucliadb_writer_manager.post("/kbs", json={"slug": uuid.uuid4().hex})
assert resp.status_code == 201
kbs.append(resp.json().get("uuid"))
yield kbs
for kb in kbs:
- resp = await nucliadb_manager.delete(f"/kb/{kb}")
+ resp = await nucliadb_writer_manager.delete(f"/kb/{kb}")
assert resp.status_code == 200
+@pytest.mark.deploy_modes("standalone")
async def test_run_all_kb_migrations(execution_context: ExecutionContext, two_knowledgeboxes):
# Set migration version to -1 for all knowledgeboxes
for kbid in two_knowledgeboxes:
@@ -100,6 +103,7 @@ async def test_run_all_kb_migrations(execution_context: ExecutionContext, two_kn
assert global_info.current_version == 1
+@pytest.mark.deploy_modes("standalone")
async def test_run_kb_rollovers(execution_context: ExecutionContext, two_knowledgeboxes):
# Set migration version to -1 for all knowledgeboxes
for kbid in two_knowledgeboxes:
diff --git a/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_rank_fusion.py b/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_rank_fusion.py
index df0f234b2c..911fdfa7ae 100644
--- a/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_rank_fusion.py
+++ b/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_rank_fusion.py
@@ -39,6 +39,7 @@
(ReciprocalRankFusion().model_dump(), {SCORE_TYPE.BM25}),
],
)
+@pytest.mark.deploy_modes("standalone")
async def test_rank_fusion(
nucliadb_reader: AsyncClient,
philosophy_books_kb: str,
@@ -91,6 +92,7 @@ def get_score_types(results: KnowledgeboxFindResults) -> set[SCORE_TYPE]:
return score_types
+@pytest.mark.deploy_modes("standalone")
async def test_reciprocal_rank_fusion_requests_more_results(
nucliadb_reader: AsyncClient,
philosophy_books_kb: str,
diff --git a/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_reranker.py b/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_reranker.py
index d592b59919..c5f76f74fe 100644
--- a/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_reranker.py
+++ b/nucliadb/tests/nucliadb/integration/search/post_retrieval/test_reranker.py
@@ -37,6 +37,7 @@
PredictReranker(window=50).model_dump(),
],
)
+@pytest.mark.deploy_modes("standalone")
async def test_reranker(
nucliadb_reader: AsyncClient,
philosophy_books_kb: str,
@@ -80,6 +81,7 @@ async def test_reranker(
(PredictReranker(window=5 * 2).model_dump(), 5 * 2),
],
)
+@pytest.mark.deploy_modes("standalone")
async def test_predict_reranker_requests_more_results(
nucliadb_reader: AsyncClient,
philosophy_books_kb: str,
diff --git a/nucliadb/tests/nucliadb/integration/search/test_autofilters.py b/nucliadb/tests/nucliadb/integration/search/test_autofilters.py
index 3789a2a037..efaa83a3ba 100644
--- a/nucliadb/tests/nucliadb/integration/search/test_autofilters.py
+++ b/nucliadb/tests/nucliadb/integration/search/test_autofilters.py
@@ -29,15 +29,16 @@
from tests.utils.predict import predict_query_hook
+@pytest.mark.deploy_modes("standalone")
async def test_autofilters_are_returned(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox: str,
knowledge_graph,
mocked_predict,
):
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/search",
+ f"/kb/{standalone_knowledgebox}/search",
params={
"query": "What relates Newton and Becquer?",
},
@@ -46,7 +47,7 @@ async def test_autofilters_are_returned(
assert resp.json()["autofilters"] == []
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
params={
"autofilter": True,
"query": "What relates Newton and Becquer?",
diff --git a/nucliadb/tests/nucliadb/integration/search/test_filters.py b/nucliadb/tests/nucliadb/integration/search/test_filters.py
index c05064285c..b16a59f6fc 100644
--- a/nucliadb/tests/nucliadb/integration/search/test_filters.py
+++ b/nucliadb/tests/nucliadb/integration/search/test_filters.py
@@ -262,7 +262,7 @@ def broker_message_with_labels(kbid):
return bm
-async def create_test_labelsets(nucliadb_writer, kbid: str):
+async def create_test_labelsets(nucliadb_writer: AsyncClient, kbid: str):
for kind, _label in (
(LabelSetKind.RESOURCES, ClassificationLabels.RESOURCE_ANNOTATED),
(LabelSetKind.RESOURCES, ClassificationLabels.FIELD_DETECTED),
@@ -282,16 +282,17 @@ async def create_test_labelsets(nucliadb_writer, kbid: str):
@pytest.fixture(scope="function")
async def kbid(
- nucliadb_grpc: WriterStub,
- nucliadb_writer,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ nucliadb_writer: AsyncClient,
+ standalone_knowledgebox,
):
- await create_test_labelsets(nucliadb_writer, knowledgebox)
- await inject_message(nucliadb_grpc, broker_message_with_entities(knowledgebox))
- await inject_message(nucliadb_grpc, broker_message_with_labels(knowledgebox))
- return knowledgebox
+ await create_test_labelsets(nucliadb_writer, standalone_knowledgebox)
+ await inject_message(nucliadb_ingest_grpc, broker_message_with_entities(standalone_knowledgebox))
+ await inject_message(nucliadb_ingest_grpc, broker_message_with_labels(standalone_knowledgebox))
+ return standalone_knowledgebox
+@pytest.mark.deploy_modes("standalone")
async def test_filtering_before_and_after_reindexing(
app_context, nucliadb_reader: AsyncClient, kbid: str
):
diff --git a/nucliadb/tests/nucliadb/integration/search/test_filters_expression.py b/nucliadb/tests/nucliadb/integration/search/test_filters_expression.py
index 72e6972049..caa6c2c35f 100644
--- a/nucliadb/tests/nucliadb/integration/search/test_filters_expression.py
+++ b/nucliadb/tests/nucliadb/integration/search/test_filters_expression.py
@@ -18,9 +18,15 @@
# along with this program. If not, see .
#
+import pytest
+from httpx import AsyncClient
-async def test_filtering_expression(nucliadb_reader, nucliadb_writer, knowledgebox):
- kbid = knowledgebox
+
+@pytest.mark.deploy_modes("standalone")
+async def test_filtering_expression(
+ nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, standalone_knowledgebox: str
+):
+ kbid = standalone_knowledgebox
slug_to_uuid = {}
# Create 3 resources in different folders
@@ -91,7 +97,10 @@ async def test_filtering_expression(nucliadb_reader, nucliadb_writer, knowledgeb
assert found_uuids == expected_uuids
-async def test_filtering_expression_validation(nucliadb_reader, nucliadb_writer):
+@pytest.mark.deploy_modes("standalone")
+async def test_filtering_expression_validation(
+ nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient
+):
# Make sure we only allow one operator per filter
resp = await nucliadb_reader.post(
f"/kb/foobar/find",
diff --git a/nucliadb/tests/nucliadb/integration/search/test_hidden.py b/nucliadb/tests/nucliadb/integration/search/test_hidden.py
index 3017f2452f..be3ac5a6cf 100644
--- a/nucliadb/tests/nucliadb/integration/search/test_hidden.py
+++ b/nucliadb/tests/nucliadb/integration/search/test_hidden.py
@@ -27,68 +27,75 @@
from tests.utils import broker_resource_with_title_paragraph, inject_message
-async def create_resource(kbid, nucliadb_grpc):
+async def create_resource(kbid: str, nucliadb_ingest_grpc: WriterStub):
message = broker_resource_with_title_paragraph(kbid)
- await inject_message(nucliadb_grpc, message)
+ await inject_message(nucliadb_ingest_grpc, message)
return message.uuid
+@pytest.mark.deploy_modes("standalone")
async def test_hidden_search(
app_context,
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- nucliadb_manager: AsyncClient,
- knowledgebox: str,
+ nucliadb_ingest_grpc: WriterStub,
+ nucliadb_writer_manager: AsyncClient,
+ standalone_knowledgebox: str,
):
- resp = await nucliadb_manager.patch(f"/kb/{knowledgebox}", json={"hidden_resources_enabled": True})
+ resp = await nucliadb_writer_manager.patch(
+ f"/kb/{standalone_knowledgebox}", json={"hidden_resources_enabled": True}
+ )
assert resp.status_code == 200
- r1 = await create_resource(knowledgebox, nucliadb_grpc)
- r2 = await create_resource(knowledgebox, nucliadb_grpc)
+ r1 = await create_resource(standalone_knowledgebox, nucliadb_ingest_grpc)
+ r2 = await create_resource(standalone_knowledgebox, nucliadb_ingest_grpc)
# Both resources appear in searches
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search")
assert resp.status_code == 200
assert resp.json()["resources"].keys() == {r1, r2}
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=title")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=title")
assert resp.status_code == 200
assert set([r["rid"] for r in resp.json()["paragraphs"]["results"]]) == {r1, r2}
# Hide r1
- resp = await nucliadb_writer.patch(f"/kb/{knowledgebox}/resource/{r1}", json={"hidden": True})
+ resp = await nucliadb_writer.patch(
+ f"/kb/{standalone_knowledgebox}/resource/{r1}", json={"hidden": True}
+ )
assert resp.status_code == 200
await asyncio.sleep(0.5)
# Only r2 appears on search
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search")
assert resp.status_code == 200
assert resp.json()["resources"].keys() == {r2}
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=title")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=title")
assert resp.status_code == 200
assert set([r["rid"] for r in resp.json()["paragraphs"]["results"]]) == {r2}
# Unless show_hidden is passed, then both resources are returned
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?show_hidden=true")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?show_hidden=true")
assert resp.status_code == 200
assert resp.json()["resources"].keys() == {r1, r2}
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=title&show_hidden=true")
+ resp = await nucliadb_reader.get(
+ f"/kb/{standalone_knowledgebox}/suggest?query=title&show_hidden=true"
+ )
assert resp.status_code == 200
assert set([r["rid"] for r in resp.json()["paragraphs"]["results"]]) == {r1, r2}
# Test catalog ternary filter
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/catalog")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/catalog")
assert resp.status_code == 200
assert resp.json()["resources"].keys() == {r1, r2}
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/catalog?hidden=true")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/catalog?hidden=true")
assert resp.status_code == 200
assert resp.json()["resources"].keys() == {r1}
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/catalog?hidden=false")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/catalog?hidden=false")
assert resp.status_code == 200
assert resp.json()["resources"].keys() == {r2}
diff --git a/nucliadb/tests/nucliadb/integration/search/test_search.py b/nucliadb/tests/nucliadb/integration/search/test_search.py
index 8194d3f945..6f6d91fca8 100644
--- a/nucliadb/tests/nucliadb/integration/search/test_search.py
+++ b/nucliadb/tests/nucliadb/integration/search/test_search.py
@@ -50,17 +50,18 @@
from tests.utils import broker_resource, inject_message
+@pytest.mark.deploy_modes("standalone")
async def test_simple_search_sc_2062(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
# PUBLIC API
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}")
assert resp.status_code == 200
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"slug": "myresource",
"title": "My Title",
@@ -71,19 +72,19 @@ async def test_simple_search_sc_2062(
assert resp.status_code == 201
rid = resp.json()["uuid"]
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}")
assert resp.status_code == 200
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query=title")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?query=title")
assert resp.status_code == 200
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query=summary")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?query=summary")
assert resp.status_code == 200
assert len(resp.json()["paragraphs"]["results"]) == 1
-def broker_resource_with_duplicates(knowledgebox, sentence):
- bm = broker_resource(kbid=knowledgebox)
+def broker_resource_with_duplicates(standalone_knowledgebox, sentence):
+ bm = broker_resource(kbid=standalone_knowledgebox)
paragraph = sentence
text = f"{paragraph}{paragraph}"
etw = rpb.ExtractedTextWrapper()
@@ -131,52 +132,62 @@ def broker_resource_with_duplicates(knowledgebox, sentence):
return bm
-async def create_resource_with_duplicates(knowledgebox, writer: WriterStub, sentence: str):
- bm = broker_resource_with_duplicates(knowledgebox, sentence=sentence)
+async def create_resource_with_duplicates(standalone_knowledgebox, writer: WriterStub, sentence: str):
+ bm = broker_resource_with_duplicates(standalone_knowledgebox, sentence=sentence)
await inject_message(writer, bm)
return bm.uuid
+@pytest.mark.deploy_modes("standalone")
async def test_search_filters_out_duplicate_paragraphs(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
- await create_resource_with_duplicates(knowledgebox, nucliadb_grpc, sentence="My own text Ramon. ")
await create_resource_with_duplicates(
- knowledgebox, nucliadb_grpc, sentence="Another different paragraph with text"
+ standalone_knowledgebox, nucliadb_ingest_grpc, sentence="My own text Ramon. "
+ )
+ await create_resource_with_duplicates(
+ standalone_knowledgebox, nucliadb_ingest_grpc, sentence="Another different paragraph with text"
)
query = "text"
# It should filter out duplicates by default
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query={query}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?query={query}")
assert resp.status_code == 200
content = resp.json()
assert len(content["paragraphs"]["results"]) == 2
# It should filter out duplicates if specified
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query={query}&with_duplicates=false")
+ resp = await nucliadb_reader.get(
+ f"/kb/{standalone_knowledgebox}/search?query={query}&with_duplicates=false"
+ )
assert resp.status_code == 200
content = resp.json()
assert len(content["paragraphs"]["results"]) == 2
# It should return duplicates if specified
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query={query}&with_duplicates=true")
+ resp = await nucliadb_reader.get(
+ f"/kb/{standalone_knowledgebox}/search?query={query}&with_duplicates=true"
+ )
assert resp.status_code == 200
content = resp.json()
assert len(content["paragraphs"]["results"]) == 4
+@pytest.mark.deploy_modes("standalone")
async def test_search_returns_paragraph_positions(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
sentence = "My own text Ramon."
- await create_resource_with_duplicates(knowledgebox, nucliadb_grpc, sentence=sentence)
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query=Ramon")
+ await create_resource_with_duplicates(
+ standalone_knowledgebox, nucliadb_ingest_grpc, sentence=sentence
+ )
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?query=Ramon")
assert resp.status_code == 200
content = resp.json()
position = content["paragraphs"]["results"][0]["position"]
@@ -186,8 +197,8 @@ async def test_search_returns_paragraph_positions(
assert position["page_number"] is not None
-def broker_resource_with_classifications(knowledgebox):
- bm = broker_resource(kbid=knowledgebox)
+def broker_resource_with_classifications(standalone_knowledgebox):
+ bm = broker_resource(kbid=standalone_knowledgebox)
text = "Some text"
etw = rpb.ExtractedTextWrapper()
@@ -239,17 +250,18 @@ def broker_resource_with_classifications(knowledgebox):
return bm
+@pytest.mark.deploy_modes("standalone")
async def test_search_returns_labels(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
- bm = broker_resource_with_classifications(knowledgebox)
- await inject_message(nucliadb_grpc, bm)
+ bm = broker_resource_with_classifications(standalone_knowledgebox)
+ await inject_message(nucliadb_ingest_grpc, bm)
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/search?query=Some",
+ f"/kb/{standalone_knowledgebox}/search?query=Some",
)
assert resp.status_code == 200
content = resp.json()
@@ -258,39 +270,41 @@ async def test_search_returns_labels(
assert par["labels"] == ["labelset1/label2", "labelset1/label1"]
+@pytest.mark.deploy_modes("standalone")
async def test_search_with_filters(
nucliadb_reader: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
# Inject a resource with a pdf icon
- bm = broker_resource(knowledgebox)
+ bm = broker_resource(standalone_knowledgebox)
bm.basic.icon = "application/pdf"
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
# Check that filtering by pdf icon returns it
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/search?show=basic&filters=/icon/application/pdf"
+ f"/kb/{standalone_knowledgebox}/search?show=basic&filters=/icon/application/pdf"
)
assert resp.status_code == 200
assert len(resp.json()["resources"]) == 1
# With a different icon should return no results
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/search?show=basic&filters=/icon/application/docx"
+ f"/kb/{standalone_knowledgebox}/search?show=basic&filters=/icon/application/docx"
)
assert resp.status_code == 200
assert len(resp.json()["resources"]) == 0
+@pytest.mark.deploy_modes("standalone")
async def test_paragraph_search_with_filters(
- nucliadb_writer,
- nucliadb_reader,
- nucliadb_grpc,
- knowledgebox,
+ nucliadb_writer: AsyncClient,
+ nucliadb_reader: AsyncClient,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
# Create a resource with two fields (title and summary)
resp = await nucliadb_writer.post(
f"/kb/{kbid}/resources",
@@ -321,13 +335,14 @@ async def test_paragraph_search_with_filters(
@pytest.mark.skip(reason="Needs sc-5626")
+@pytest.mark.deploy_modes("standalone")
async def test_(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "Rust for dummies",
},
@@ -336,7 +351,7 @@ async def test_(
rust_for_dummies = resp.json()["uuid"]
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "Introduction to Python",
},
@@ -345,7 +360,7 @@ async def test_(
intro_to_python = resp.json()["uuid"]
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/catalog",
+ f"/kb/{standalone_knowledgebox}/catalog",
params={
"query": "Rust",
},
@@ -356,7 +371,7 @@ async def test_(
assert rust_for_dummies in resources
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/catalog",
+ f"/kb/{standalone_knowledgebox}/catalog",
params={
"query": "Intro",
},
@@ -367,15 +382,16 @@ async def test_(
assert intro_to_python in resources
+@pytest.mark.deploy_modes("standalone")
async def test_search_returns_sentence_positions(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
- await inject_resource_with_a_sentence(knowledgebox, nucliadb_grpc)
+ await inject_resource_with_a_sentence(standalone_knowledgebox, nucliadb_ingest_grpc)
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/search", json=dict(query="my own text", min_score=-1)
+ f"/kb/{standalone_knowledgebox}/search", json=dict(query="my own text", min_score=-1)
)
assert resp.status_code == 200
content = resp.json()
@@ -386,8 +402,8 @@ async def test_search_returns_sentence_positions(
assert "page_number" not in position
-def get_resource_with_a_sentence(knowledgebox):
- bm = broker_resource(knowledgebox)
+def get_resource_with_a_sentence(standalone_knowledgebox):
+ bm = broker_resource(standalone_knowledgebox)
bm.files["file"].file.uri = "http://nofile"
bm.files["file"].file.size = 0
@@ -431,16 +447,17 @@ def get_resource_with_a_sentence(knowledgebox):
return bm
-async def inject_resource_with_a_sentence(knowledgebox, writer):
- bm = get_resource_with_a_sentence(knowledgebox)
+async def inject_resource_with_a_sentence(standalone_knowledgebox, writer):
+ bm = get_resource_with_a_sentence(standalone_knowledgebox)
await inject_message(writer, bm)
+@pytest.mark.deploy_modes("standalone")
async def test_search_relations(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
knowledge_graph,
):
relation_nodes, relation_edges = knowledge_graph
@@ -453,7 +470,7 @@ async def test_search_relations(
)
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/search",
+ f"/kb/{standalone_knowledgebox}/search",
params={
"features": "relations",
"query": "What relates Newton and Becquer?",
@@ -528,7 +545,7 @@ async def test_search_relations(
predict_mock.detect_entities = AsyncMock(return_value=[relation_nodes["Animal"]])
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/search",
+ f"/kb/{standalone_knowledgebox}/search",
params={
"features": "relations",
"query": "Do you like animals?",
@@ -570,14 +587,15 @@ async def test_search_relations(
assert expected_relation in entities[entity]["related_to"]
+@pytest.mark.deploy_modes("standalone")
async def test_search_automatic_relations(
- nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, knowledgebox
+ nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, standalone_knowledgebox
):
predict_mock = Mock()
set_utility(Utility.PREDICT, predict_mock)
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My resource",
"slug": "myresource",
@@ -640,7 +658,7 @@ async def test_search_automatic_relations(
predict_mock.detect_entities = AsyncMock(return_value=[rn])
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/search",
+ f"/kb/{standalone_knowledgebox}/search",
params={
"features": "relations",
"query": "Relations for this resource",
@@ -753,7 +771,7 @@ async def test_search_automatic_relations(
predict_mock.detect_entities = AsyncMock(return_value=[rn])
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/search",
+ f"/kb/{standalone_knowledgebox}/search",
params={
"features": "relations",
"query": "You know John?",
@@ -794,14 +812,15 @@ async def get_audit_messages(sub):
return auditreq
+@pytest.mark.deploy_modes("standalone")
async def test_search_sends_audit(
- nucliadb_reader,
- knowledgebox,
+ nucliadb_reader: AsyncClient,
+ standalone_knowledgebox,
stream_audit: StreamAuditStorage,
):
from nucliadb_utils.settings import audit_settings
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
# Prepare a test audit stream to receive our messages
partition = stream_audit.get_partition(kbid)
@@ -839,13 +858,14 @@ async def test_search_sends_audit(
@pytest.mark.parametrize("endpoint", ["search", "find"])
+@pytest.mark.deploy_modes("standalone")
async def test_search_endpoints_handle_predict_errors(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
predict_mock,
endpoint,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
for query_mock in (AsyncMock(side_effect=SendToPredictError()),):
predict_mock.query = query_mock
@@ -862,7 +882,7 @@ async def test_search_endpoints_handle_predict_errors(
async def create_dummy_resources(
- nucliadb_writer: AsyncClient, nucliadb_grpc: WriterStub, kbid, n=10, start=0
+ nucliadb_writer: AsyncClient, nucliadb_ingest_grpc: WriterStub, kbid, n=10, start=0
):
payloads = [
{
@@ -903,24 +923,24 @@ async def create_dummy_resources(
message.field_vectors.append(ev)
message.source = BrokerMessage.MessageSource.PROCESSOR
- await inject_message(nucliadb_grpc, message)
+ await inject_message(nucliadb_ingest_grpc, message)
@pytest.fixture(scope="function")
async def kb_with_one_logic_shard(
- nucliadb_manager: AsyncClient,
+ nucliadb_writer_manager: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
):
- resp = await nucliadb_manager.post("/kbs", json={})
+ resp = await nucliadb_writer_manager.post("/kbs", json={})
assert resp.status_code == 201
kbid = resp.json().get("uuid")
- await create_dummy_resources(nucliadb_writer, nucliadb_grpc, kbid, n=10)
+ await create_dummy_resources(nucliadb_writer, nucliadb_ingest_grpc, kbid, n=10)
yield kbid
- resp = await nucliadb_manager.delete(f"/kb/{kbid}")
+ resp = await nucliadb_writer_manager.delete(f"/kb/{kbid}")
assert resp.status_code == 200
@@ -933,37 +953,38 @@ def max_shard_paragraphs():
@pytest.fixture(scope="function")
async def kb_with_two_logic_shards(
max_shard_paragraphs,
- nucliadb_manager: AsyncClient,
+ nucliadb_writer_manager: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
):
sc = shard_creator.ShardCreatorHandler(
driver=get_driver(),
storage=await get_storage(),
pubsub=None, # type: ignore
)
- resp = await nucliadb_manager.post("/kbs", json={})
+ resp = await nucliadb_writer_manager.post("/kbs", json={})
assert resp.status_code == 201
kbid = resp.json().get("uuid")
- await create_dummy_resources(nucliadb_writer, nucliadb_grpc, kbid, n=8)
+ await create_dummy_resources(nucliadb_writer, nucliadb_ingest_grpc, kbid, n=8)
# trigger creating new shard manually here
sc.shard_manager.should_create_new_shard = Mock(return_value=True) # type: ignore
await sc.process_kb(kbid)
- await create_dummy_resources(nucliadb_writer, nucliadb_grpc, kbid, n=10, start=8)
+ await create_dummy_resources(nucliadb_writer, nucliadb_ingest_grpc, kbid, n=10, start=8)
yield kbid
- resp = await nucliadb_manager.delete(f"/kb/{kbid}")
+ resp = await nucliadb_writer_manager.delete(f"/kb/{kbid}")
assert resp.status_code == 200
@pytest.mark.flaky(reruns=5)
+@pytest.mark.deploy_modes("standalone")
async def test_search_two_logic_shards(
nucliadb_reader: AsyncClient,
- nucliadb_manager: AsyncClient,
+ nucliadb_reader_manager: AsyncClient,
kb_with_one_logic_shard,
kb_with_two_logic_shards,
):
@@ -971,11 +992,11 @@ async def test_search_two_logic_shards(
kbid2 = kb_with_two_logic_shards
# Check that they have one and two logic shards, respectively
- resp = await nucliadb_manager.get(f"kb/{kbid1}/shards")
+ resp = await nucliadb_reader_manager.get(f"kb/{kbid1}/shards")
assert resp.status_code == 200
assert len(resp.json()["shards"]) == 1
- resp = await nucliadb_manager.get(f"kb/{kbid2}/shards")
+ resp = await nucliadb_reader_manager.get(f"kb/{kbid2}/shards")
assert resp.status_code == 200
assert len(resp.json()["shards"]) == 2
@@ -1000,18 +1021,19 @@ async def test_search_two_logic_shards(
assert len(content1["sentences"]["results"]) == len(content2["sentences"]["results"])
+@pytest.mark.deploy_modes("standalone")
async def test_search_min_score(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
# When not specifying the min score on the request, it should default to 0.7
- resp = await nucliadb_reader.post(f"/kb/{knowledgebox}/search", json={"query": "dummy"})
+ resp = await nucliadb_reader.post(f"/kb/{standalone_knowledgebox}/search", json={"query": "dummy"})
assert resp.status_code == 200
assert resp.json()["sentences"]["min_score"] == 0.7
# If we specify a min score, it should be used
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/search",
+ f"/kb/{standalone_knowledgebox}/search",
json={"query": "dummy", "min_score": {"bm25": 10, "semantic": 0.5}},
)
assert resp.status_code == 200
@@ -1044,14 +1066,15 @@ async def test_search_min_score(
(["/a/b", "/a/be"], True, ""),
],
)
+@pytest.mark.deploy_modes("standalone")
async def test_facets_validation(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
facets,
valid,
error_message,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
for endpoint in ("search",):
for method in ("post", "get"):
func = getattr(nucliadb_reader, method)
@@ -1066,13 +1089,14 @@ async def test_facets_validation(
assert error_message in resp.json()["detail"][0]["msg"]
+@pytest.mark.deploy_modes("standalone")
async def test_search_marks_fuzzy_results(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"slug": "myresource",
"title": "My Title",
@@ -1082,7 +1106,7 @@ async def test_search_marks_fuzzy_results(
# Should get only one non-fuzzy result
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/search",
+ f"/kb/{standalone_knowledgebox}/search",
json={
"query": "Title",
},
@@ -1093,7 +1117,7 @@ async def test_search_marks_fuzzy_results(
# Should get only one fuzzy result
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/search",
+ f"/kb/{standalone_knowledgebox}/search",
json={
"query": "totle",
},
@@ -1104,7 +1128,7 @@ async def test_search_marks_fuzzy_results(
# Should not get any result if exact match term queried
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/search",
+ f"/kb/{standalone_knowledgebox}/search",
json={
"query": '"totle"',
},
@@ -1122,17 +1146,18 @@ def check_fuzzy_paragraphs(search_response, *, fuzzy_result: bool, n_expected: i
assert found == n_expected
+@pytest.mark.deploy_modes("standalone")
async def test_search_by_path_filter(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
paths = ["/foo", "foo/bar", "foo/bar/1", "foo/bar/2", "foo/bar/3", "foo/bar/4"]
for path in paths:
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": f"My resource: {path}",
"summary": "Some summary",
@@ -1144,7 +1169,7 @@ async def test_search_by_path_filter(
assert resp.status_code == 201
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/catalog",
+ f"/kb/{standalone_knowledgebox}/catalog",
params={
"query": "",
},
@@ -1153,21 +1178,26 @@ async def test_search_by_path_filter(
assert len(resp.json()["resources"]) == len(paths)
# Get the list of all
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?filters=/origin.path/foo")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?filters=/origin.path/foo")
assert resp.status_code == 200
assert len(resp.json()["resources"]) == len(paths)
# Get the list of under foo/bar
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?filters=/origin.path/foo/bar")
+ resp = await nucliadb_reader.get(
+ f"/kb/{standalone_knowledgebox}/search?filters=/origin.path/foo/bar"
+ )
assert resp.status_code == 200
assert len(resp.json()["resources"]) == len(paths) - 1
# Get the list of under foo/bar/4
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?filters=/origin.path/foo/bar/4")
+ resp = await nucliadb_reader.get(
+ f"/kb/{standalone_knowledgebox}/search?filters=/origin.path/foo/bar/4"
+ )
assert resp.status_code == 200
assert len(resp.json()["resources"]) == 1
+@pytest.mark.deploy_modes("standalone")
async def test_search_kb_not_found(nucliadb_reader: AsyncClient):
resp = await nucliadb_reader.get(
"/kb/00000000000000/search?query=own+text",
@@ -1175,8 +1205,11 @@ async def test_search_kb_not_found(nucliadb_reader: AsyncClient):
assert resp.status_code == 404
-async def test_resource_search_query_param_is_optional(nucliadb_reader, knowledgebox):
- kb = knowledgebox
+@pytest.mark.deploy_modes("standalone")
+async def test_resource_search_query_param_is_optional(
+ nucliadb_reader: AsyncClient, standalone_knowledgebox
+):
+ kb = standalone_knowledgebox
# If query is not present, should not fail
resp = await nucliadb_reader.get(f"/kb/{kb}/search")
assert resp.status_code == 200
@@ -1187,8 +1220,9 @@ async def test_resource_search_query_param_is_optional(nucliadb_reader, knowledg
assert resp.status_code == 200
-async def test_search_with_duplicates(nucliadb_reader, knowledgebox):
- kb = knowledgebox
+@pytest.mark.deploy_modes("standalone")
+async def test_search_with_duplicates(nucliadb_reader: AsyncClient, standalone_knowledgebox):
+ kb = standalone_knowledgebox
resp = await nucliadb_reader.get(f"/kb/{kb}/search?with_duplicates=True")
assert resp.status_code == 200
@@ -1205,10 +1239,11 @@ def search_with_limits_exceeded_error():
yield
+@pytest.mark.deploy_modes("standalone")
async def test_search_handles_limits_exceeded_error(
- nucliadb_reader, knowledgebox, search_with_limits_exceeded_error
+ nucliadb_reader: AsyncClient, standalone_knowledgebox, search_with_limits_exceeded_error
):
- kb = knowledgebox
+ kb = standalone_knowledgebox
resp = await nucliadb_reader.get(f"/kb/{kb}/search")
assert resp.status_code == 402
assert resp.json() == {"detail": "over the quota"}
@@ -1218,12 +1253,13 @@ async def test_search_handles_limits_exceeded_error(
assert resp.json() == {"detail": "over the quota"}
+@pytest.mark.deploy_modes("standalone")
async def test_catalog_post(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/catalog",
+ f"/kb/{standalone_knowledgebox}/catalog",
json={
"query": "",
"filters": [
@@ -1248,6 +1284,7 @@ def not_debug():
running_settings.debug = prev
+@pytest.mark.deploy_modes("standalone")
async def test_api_does_not_show_tracebacks_on_api_errors(not_debug, nucliadb_reader: AsyncClient):
with mock.patch(
"nucliadb.search.api.v1.search.search",
@@ -1258,15 +1295,16 @@ async def test_api_does_not_show_tracebacks_on_api_errors(not_debug, nucliadb_re
assert resp.json() == {"detail": "Something went wrong, please contact your administrator"}
+@pytest.mark.deploy_modes("standalone")
async def test_catalog_pagination(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
n_resources = 35
for i in range(n_resources):
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": f"Resource {i}",
"texts": {
@@ -1287,7 +1325,7 @@ async def test_catalog_pagination(
page_number = 0
while True:
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/catalog",
+ f"/kb/{standalone_knowledgebox}/catalog",
params={
"page_number": page_number,
"page_size": page_size,
@@ -1309,14 +1347,15 @@ async def test_catalog_pagination(
assert len(resource_uuids) == n_resources
+@pytest.mark.deploy_modes("standalone")
async def test_catalog_date_range_filtering(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
now = datetime.now()
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": f"Resource",
"texts": {
@@ -1330,7 +1369,7 @@ async def test_catalog_date_range_filtering(
one_hour_ago = now - timedelta(hours=1)
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/catalog",
+ f"/kb/{standalone_knowledgebox}/catalog",
params={
"range_creation_start": one_hour_ago.isoformat(),
},
@@ -1340,7 +1379,7 @@ async def test_catalog_date_range_filtering(
assert len(body["resources"]) == 1
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/catalog",
+ f"/kb/{standalone_knowledgebox}/catalog",
json={
"range_creation_end": one_hour_ago.isoformat(),
},
@@ -1350,23 +1389,24 @@ async def test_catalog_date_range_filtering(
assert len(body["resources"]) == 0
+@pytest.mark.deploy_modes("standalone")
async def test_catalog_faceted(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
valid_status = ["PROCESSED", "PENDING", "ERROR"]
for status_name, status_value in rpb.Metadata.Status.items():
if status_name not in valid_status:
continue
- bm = broker_resource(knowledgebox)
+ bm = broker_resource(standalone_knowledgebox)
bm.basic.metadata.status = status_value
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/catalog?faceted=/metadata.status",
+ f"/kb/{standalone_knowledgebox}/catalog?faceted=/metadata.status",
)
assert resp.status_code == 200
body = resp.json()
@@ -1378,11 +1418,12 @@ async def test_catalog_faceted(
assert count == 1
+@pytest.mark.deploy_modes("standalone")
async def test_catalog_faceted_labels(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
# 4 resources:
# 1 with /l/labelset0/label0
@@ -1390,22 +1431,22 @@ async def test_catalog_faceted_labels(
# 1 with /l/labelset1/label0
for label in range(2):
for count in range(label + 1):
- bm = broker_resource(knowledgebox)
+ bm = broker_resource(standalone_knowledgebox)
c = rpb.Classification()
c.labelset = f"labelset0"
c.label = f"label{label}"
bm.basic.usermetadata.classifications.append(c)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
- bm = broker_resource(knowledgebox)
+ bm = broker_resource(standalone_knowledgebox)
c = rpb.Classification()
c.labelset = "labelset1"
c.label = "label0"
bm.basic.usermetadata.classifications.append(c)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/catalog?faceted=/classification.labels/labelset0",
+ f"/kb/{standalone_knowledgebox}/catalog?faceted=/classification.labels/labelset0",
)
assert resp.status_code == 200
body = resp.json()
@@ -1418,7 +1459,7 @@ async def test_catalog_faceted_labels(
# This is used by the check missing labels button in dashboard
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/catalog?faceted=/classification.labels",
+ f"/kb/{standalone_knowledgebox}/catalog?faceted=/classification.labels",
)
assert resp.status_code == 200
body = resp.json()
@@ -1430,24 +1471,25 @@ async def test_catalog_faceted_labels(
}
+@pytest.mark.deploy_modes("standalone")
async def test_catalog_filters(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
valid_status = ["PROCESSED", "PENDING", "ERROR"]
for status_name, status_value in rpb.Metadata.Status.items():
if status_name not in valid_status:
continue
- bm = broker_resource(knowledgebox)
+ bm = broker_resource(standalone_knowledgebox)
bm.basic.metadata.status = status_value
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
# No filters
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/catalog",
+ f"/kb/{standalone_knowledgebox}/catalog",
)
assert resp.status_code == 200
body = resp.json()
@@ -1455,7 +1497,7 @@ async def test_catalog_filters(
# Simple filter
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/catalog?filters=/metadata.status/PENDING",
+ f"/kb/{standalone_knowledgebox}/catalog?filters=/metadata.status/PENDING",
)
assert resp.status_code == 200
body = resp.json()
@@ -1464,7 +1506,7 @@ async def test_catalog_filters(
# AND filter
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/catalog",
+ f"/kb/{standalone_knowledgebox}/catalog",
json={"filters": [{"all": ["/metadata.status/PENDING", "/metadata.status/ERROR"]}]},
)
assert resp.status_code == 200
@@ -1473,7 +1515,7 @@ async def test_catalog_filters(
# OR filter
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/catalog",
+ f"/kb/{standalone_knowledgebox}/catalog",
json={"filters": [{"any": ["/metadata.status/PENDING", "/metadata.status/ERROR"]}]},
)
assert resp.status_code == 200
diff --git a/nucliadb/tests/nucliadb/integration/search/test_search_date_ranges_filter.py b/nucliadb/tests/nucliadb/integration/search/test_search_date_ranges_filter.py
index e96d1f0f6d..f1fdf62f52 100644
--- a/nucliadb/tests/nucliadb/integration/search/test_search_date_ranges_filter.py
+++ b/nucliadb/tests/nucliadb/integration/search/test_search_date_ranges_filter.py
@@ -24,6 +24,7 @@
from nucliadb.tests.vectors import V1
from nucliadb_models.search import SearchOptions
+from nucliadb_protos.writer_pb2_grpc import WriterStub
from tests.nucliadb.integration.search.test_search import get_resource_with_a_sentence
from tests.utils import inject_message
@@ -41,13 +42,13 @@ def a_week_before(date):
@pytest.fixture(scope="function")
-async def resource(nucliadb_grpc, knowledgebox):
- bm = get_resource_with_a_sentence(knowledgebox)
+async def resource(nucliadb_ingest_grpc: WriterStub, standalone_knowledgebox):
+ bm = get_resource_with_a_sentence(standalone_knowledgebox)
bm.basic.created.FromDatetime(NOW)
bm.basic.modified.FromDatetime(NOW)
bm.origin.ClearField("created")
bm.origin.ClearField("modified")
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
return bm.uuid
@@ -81,9 +82,10 @@ async def resource(nucliadb_grpc, knowledgebox):
SearchOptions.SEMANTIC,
],
)
+@pytest.mark.deploy_modes("standalone")
async def test_search_with_date_range_filters_nucliadb_dates(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox: str,
feature,
resource,
creation_start,
@@ -98,7 +100,7 @@ async def test_search_with_date_range_filters_nucliadb_dates(
"""
await _test_find_date_ranges(
nucliadb_reader,
- knowledgebox,
+ standalone_knowledgebox,
[feature],
creation_start,
creation_end,
@@ -138,10 +140,11 @@ async def test_search_with_date_range_filters_nucliadb_dates(
SearchOptions.SEMANTIC,
],
)
+@pytest.mark.deploy_modes("standalone")
async def test_search_with_date_range_filters_origin_dates(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox: str,
feature,
resource,
creation_start,
@@ -156,7 +159,7 @@ async def test_search_with_date_range_filters_origin_dates(
"""
# Set origin dates of the resource
resp = await nucliadb_writer.patch(
- f"/kb/{knowledgebox}/resource/{resource}",
+ f"/kb/{standalone_knowledgebox}/resource/{resource}",
json={
"origin": {
"created": ORIGIN_CREATION.isoformat(),
@@ -168,7 +171,7 @@ async def test_search_with_date_range_filters_origin_dates(
await _test_find_date_ranges(
nucliadb_reader,
- knowledgebox,
+ standalone_knowledgebox,
[feature],
creation_start,
creation_end,
@@ -178,8 +181,9 @@ async def test_search_with_date_range_filters_origin_dates(
)
+@pytest.mark.deploy_modes("standalone")
async def _test_find_date_ranges(
- nucliadb_reader,
+ nucliadb_reader: AsyncClient,
kbid,
features,
creation_start,
diff --git a/nucliadb/tests/nucliadb/integration/search/test_search_sorting.py b/nucliadb/tests/nucliadb/integration/search/test_search_sorting.py
index 7534569114..eebdb9ba64 100644
--- a/nucliadb/tests/nucliadb/integration/search/test_search_sorting.py
+++ b/nucliadb/tests/nucliadb/integration/search/test_search_sorting.py
@@ -25,6 +25,7 @@
from nucliadb_models.search import SearchOptions
+@pytest.mark.deploy_modes("standalone")
async def test_search_sort_by_score(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
@@ -59,6 +60,7 @@ async def test_search_sort_by_score(
("modified", "desc", lambda x: list(reversed(sorted(x)))),
],
)
+@pytest.mark.deploy_modes("standalone")
async def test_search_sorted_by_creation_and_modification_dates(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
@@ -100,6 +102,7 @@ async def test_search_sorted_by_creation_and_modification_dates(
("title", "desc", lambda x: list(reversed(sorted(x)))),
],
)
+@pytest.mark.deploy_modes("standalone")
async def test_limited_sorted_search_of_most_relevant_results(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
@@ -143,6 +146,7 @@ async def test_limited_sorted_search_of_most_relevant_results(
assert sort_fields == sort_function(sort_fields)
+@pytest.mark.deploy_modes("standalone")
async def test_empty_query_search_for_ordered_resources_by_creation_date_desc(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
@@ -167,6 +171,7 @@ async def test_empty_query_search_for_ordered_resources_by_creation_date_desc(
assert creation_dates == sorted(creation_dates, reverse=True)
+@pytest.mark.deploy_modes("standalone")
async def test_list_all_resources_by_creation_and_modification_dates_with_empty_queries(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
diff --git a/nucliadb/tests/nucliadb/integration/test_api.py b/nucliadb/tests/nucliadb/integration/test_api.py
index 07e465f1f8..4c80dcb4f1 100644
--- a/nucliadb/tests/nucliadb/integration/test_api.py
+++ b/nucliadb/tests/nucliadb/integration/test_api.py
@@ -62,9 +62,10 @@
)
+@pytest.mark.deploy_modes("standalone")
async def test_kb_creation_allows_setting_learning_configuration(
- nucliadb_manager,
- nucliadb_reader,
+ nucliadb_writer_manager: AsyncClient,
+ nucliadb_reader: AsyncClient,
onprem_nucliadb,
):
with patch("nucliadb.writer.api.v1.knowledgebox.learning_proxy", new=AsyncMock()) as learning_proxy:
@@ -85,7 +86,7 @@ async def test_kb_creation_allows_setting_learning_configuration(
)
# Check that we can define it to a different semantic model
- resp = await nucliadb_manager.post(
+ resp = await nucliadb_writer_manager.post(
f"/kbs",
json={
"title": "My KB with english semantic model",
@@ -101,25 +102,26 @@ async def test_kb_creation_allows_setting_learning_configuration(
)
+@pytest.mark.deploy_modes("standalone")
async def test_creation(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- nucliadb_train: TrainStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ nucliadb_train_grpc: TrainStub,
+ standalone_knowledgebox,
):
# PUBLIC API
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}")
assert resp.status_code == 200
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/labelset/ls1",
+ f"/kb/{standalone_knowledgebox}/labelset/ls1",
json={"title": "Labelset 1", "labels": [{"text": "text", "title": "title"}]},
)
assert resp.status_code == 200
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My title",
"slug": "myresource",
@@ -149,12 +151,12 @@ async def test_creation(
bm.extracted_text.append(et)
bm.field_metadata.append(fm)
bm.uuid = rid
- bm.kbid = knowledgebox
+ bm.kbid = standalone_knowledgebox
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}?show=extracted&show=values&extracted=text&extracted=metadata",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}?show=extracted&show=values&extracted=text&extracted=metadata",
)
assert resp.status_code == 200
assert (
@@ -167,7 +169,7 @@ async def test_creation(
# ADD A LABEL
resp = await nucliadb_writer.patch(
- f"/kb/{knowledgebox}/resource/{rid}",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}",
json={
"fieldmetadata": [
{
@@ -188,17 +190,17 @@ async def test_creation(
assert resp.status_code == 200
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}?show=errors&show=values&show=basic",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}?show=errors&show=values&show=basic",
)
assert resp.status_code == 200
# TRAINING GRPC API
request = GetSentencesRequest()
- request.kb.uuid = knowledgebox
+ request.kb.uuid = standalone_knowledgebox
request.metadata.labels = True
request.metadata.text = True
paragraph: TrainParagraph
- async for paragraph in nucliadb_train.GetParagraphs(request): # type: ignore
+ async for paragraph in nucliadb_train_grpc.GetParagraphs(request): # type: ignore
if paragraph.field.field == "title":
assert paragraph.metadata.text == "My title"
else:
@@ -210,33 +212,37 @@ async def test_creation(
trainset.batch_size = 20
trainset.type = TaskType.PARAGRAPH_CLASSIFICATION
trainset.filter.labels.append("ls1")
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/trainset")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/trainset")
assert resp.status_code == 200
data = resp.json()
assert len(data["partitions"]) == 1
partition_id = data["partitions"][0]
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/trainset/{partition_id}",
+ f"/kb/{standalone_knowledgebox}/trainset/{partition_id}",
content=trainset.SerializeToString(),
)
assert len(resp.content) > 0
-async def test_can_create_knowledgebox_with_colon_in_slug(nucliadb_manager: AsyncClient):
- resp = await nucliadb_manager.post("/kbs", json={"slug": "something:else"})
+@pytest.mark.deploy_modes("standalone")
+async def test_can_create_standalone_knowledgebox_with_colon_in_slug(
+ nucliadb_writer_manager: AsyncClient, nucliadb_reader_manager: AsyncClient
+):
+ resp = await nucliadb_writer_manager.post("/kbs", json={"slug": "something:else"})
assert resp.status_code == 201
- resp = await nucliadb_manager.get(f"/kbs")
+ resp = await nucliadb_reader_manager.get(f"/kbs")
assert resp.status_code == 200
assert resp.json()["kbs"][0]["slug"] == "something:else"
+@pytest.mark.deploy_modes("standalone")
async def test_serialize_errors(
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox: str,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox: str,
):
"""
Test description:
@@ -246,7 +252,7 @@ async def test_serialize_errors(
"""
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My resource",
"texts": {"text": TEST_TEXT_PAYLOAD},
@@ -258,7 +264,7 @@ async def test_serialize_errors(
assert resp.status_code == 201
rid = resp.json()["uuid"]
- br = broker_resource(knowledgebox, rid=rid)
+ br = broker_resource(standalone_knowledgebox, rid=rid)
# Add an error for every field type
fields_to_test = [
@@ -281,10 +287,10 @@ async def test_serialize_errors(
)
br.errors.append(error)
- await inject_message(nucliadb_grpc, br)
+ await inject_message(nucliadb_ingest_grpc, br)
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{br.uuid}",
+ f"/kb/{standalone_knowledgebox}/resource/{br.uuid}",
params=dict(show=["extracted", "errors", "basic"], extracted=["metadata"]),
)
assert resp.status_code == 200
@@ -295,11 +301,12 @@ async def test_serialize_errors(
assert resp_json["data"][ftypestring][fid]["error"]["code"] == 1
+@pytest.mark.deploy_modes("standalone")
async def test_entitygroups(
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox: str,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox: str,
):
await wait_for_sync()
entitygroup = {
@@ -316,11 +323,11 @@ async def test_entitygroups(
"spoon": {"value": "Spoon"},
},
}
- resp = await nucliadb_writer.post(f"/kb/{knowledgebox}/entitiesgroups", json=entitygroup)
+ resp = await nucliadb_writer.post(f"/kb/{standalone_knowledgebox}/entitiesgroups", json=entitygroup)
assert resp.status_code == 200
# Entities are not returned by default
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/entitiesgroups")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/entitiesgroups")
groups = resp.json()["groups"]
assert "entities" in groups["group1"]
assert len(groups["group1"]["entities"]) == 0
@@ -329,15 +336,16 @@ async def test_entitygroups(
assert groups["group1"]["custom"] is True
# show_entities=true returns a http 400
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/entitiesgroups?show_entities=true")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/entitiesgroups?show_entities=true")
assert resp.status_code == 400
+@pytest.mark.deploy_modes("standalone")
async def test_extracted_shortened_metadata(
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox: str,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox: str,
):
"""
Test description:
@@ -345,7 +353,7 @@ async def test_extracted_shortened_metadata(
- Create a resource with a field containing FieldMetadata with ner, positions and relations.
- Check that new extracted data option filters them out
"""
- br = broker_resource(knowledgebox)
+ br = broker_resource(standalone_knowledgebox)
field = rpb.FieldID(field_type=rpb.FieldType.TEXT, field="text")
fcmw = FieldComputedMetadataWrapper()
@@ -378,14 +386,14 @@ async def test_extracted_shortened_metadata(
br.field_metadata.append(fcmw)
- await inject_message(nucliadb_grpc, br)
+ await inject_message(nucliadb_ingest_grpc, br)
# TODO: Remove ner and positions once fields are removed
cropped_fields = ["ner", "positions", "relations", "classifications"]
# Check that when 'shortened_metadata' in extracted param fields are cropped
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{br.uuid}/text/text",
+ f"/kb/{standalone_knowledgebox}/resource/{br.uuid}/text/text",
params=dict(show=["extracted"], extracted=["shortened_metadata"]),
)
assert resp.status_code == 200
@@ -399,7 +407,7 @@ async def test_extracted_shortened_metadata(
# Check that when 'metadata' in extracted param fields are returned
for extracted_param in (["metadata"], ["metadata", "shortened_metadata"]):
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{br.uuid}/text/text",
+ f"/kb/{standalone_knowledgebox}/resource/{br.uuid}/text/text",
params=dict(show=["extracted"], extracted=extracted_param),
)
assert resp.status_code == 200
@@ -420,9 +428,10 @@ async def test_extracted_shortened_metadata(
("Invalid&Character", True),
],
)
+@pytest.mark.deploy_modes("standalone")
async def test_field_ids_are_validated(
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
field_id,
error,
):
@@ -435,7 +444,7 @@ async def test_field_ids_are_validated(
}
},
}
- resp = await nucliadb_writer.post(f"/kb/{knowledgebox}/resources", json=payload)
+ resp = await nucliadb_writer.post(f"/kb/{standalone_knowledgebox}/resources", json=payload)
if error:
assert resp.status_code == 422
body = resp.json()
@@ -444,10 +453,11 @@ async def test_field_ids_are_validated(
assert resp.status_code == 201
+@pytest.mark.deploy_modes("standalone")
async def test_extra(
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
):
"""
Test description:
@@ -456,7 +466,7 @@ async def test_extra(
- Check that it is returned only if requested on search results
- Check modification
"""
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
invalid_extra = {"metadata": {i: f"foo{i}" for i in range(100000)}}
resp = await nucliadb_writer.post(
f"/kb/{kbid}/resources",
@@ -520,12 +530,13 @@ async def test_extra(
assert resp.json()["extra"] == extra
+@pytest.mark.deploy_modes("standalone")
async def test_icon_doesnt_change_after_labeling_resource_sc_5625(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await nucliadb_writer.post(
f"/kb/{kbid}/resources",
json={"title": "Foo", "icon": "application/pdf"},
@@ -559,11 +570,14 @@ async def test_icon_doesnt_change_after_labeling_resource_sc_5625(
("foo/bar", False), # with slash
],
)
-async def test_resource_slug_validation(nucliadb_writer, nucliadb_reader, knowledgebox, slug, valid):
- resp = await nucliadb_writer.post(f"/kb/{knowledgebox}/resources", json={"slug": slug})
+@pytest.mark.deploy_modes("standalone")
+async def test_resource_slug_validation(
+ nucliadb_writer, nucliadb_reader: AsyncClient, standalone_knowledgebox, slug, valid
+):
+ resp = await nucliadb_writer.post(f"/kb/{standalone_knowledgebox}/resources", json={"slug": slug})
if valid:
assert resp.status_code == 201
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/slug/{slug}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/slug/{slug}")
assert resp.status_code == 200
else:
assert resp.status_code == 422
@@ -572,12 +586,13 @@ async def test_resource_slug_validation(nucliadb_writer, nucliadb_reader, knowle
assert f"Invalid slug: '{slug}'" in detail["msg"]
+@pytest.mark.deploy_modes("standalone")
async def test_icon_doesnt_change_after_adding_file_field_sc_2388(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await nucliadb_writer.post(
f"/kb/{kbid}/resources",
json={
@@ -603,13 +618,14 @@ async def test_icon_doesnt_change_after_adding_file_field_sc_2388(
assert resp.json()["icon"] == "text/plain"
+@pytest.mark.deploy_modes("standalone")
async def test_language_metadata(
- nucliadb_writer,
- nucliadb_reader,
- nucliadb_grpc,
- knowledgebox,
+ nucliadb_writer: AsyncClient,
+ nucliadb_reader: AsyncClient,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await nucliadb_writer.post(
f"/kb/{kbid}/resources",
json={"title": "My resource"},
@@ -639,7 +655,7 @@ async def test_language_metadata(
fcmw.metadata.split_metadata["foo"].language = "it"
bm.field_metadata.append(fcmw)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
resp = await nucliadb_reader.get(f"/kb/{kbid}/resource/{uuid}", params={"show": ["basic"]})
assert resp.status_code == 200
@@ -673,13 +689,14 @@ async def test_language_metadata(
assert res["metadata"]["languages"] == []
+@pytest.mark.deploy_modes("standalone")
async def test_story_7081(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My title",
"slug": "myresource",
@@ -691,28 +708,29 @@ async def test_story_7081(
rid = resp.json()["uuid"]
resp = await nucliadb_writer.patch(
- f"/kb/{knowledgebox}/resource/{rid}",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}",
json={"origin": {"metadata": {"some": "data"}}},
)
assert resp.status_code == 200
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}?show=origin",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}?show=origin",
)
assert resp.status_code == 200
data = resp.json()
assert data["origin"]["metadata"]["some"] == "data"
+@pytest.mark.deploy_modes("standalone")
async def test_question_answer(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
# create a new resource
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My title",
"slug": "myresource",
@@ -747,12 +765,12 @@ async def test_question_answer(
message.question_answers.append(qaw)
message.uuid = rid
- message.kbid = knowledgebox
+ message.kbid = standalone_knowledgebox
- await inject_message(nucliadb_grpc, message)
+ await inject_message(nucliadb_ingest_grpc, message)
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}?show=extracted&extracted=question_answers",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}?show=extracted&extracted=question_answers",
)
assert resp.status_code == 200
data = resp.json()
@@ -780,11 +798,12 @@ async def test_question_answer(
}
+@pytest.mark.deploy_modes("standalone")
async def test_question_answer_annotations(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
qa_annotation = metadata.QuestionAnswerAnnotation(
question_answer=common.QuestionAnswer(
@@ -805,7 +824,7 @@ async def test_question_answer_annotations(
)
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My title",
"slug": "myresource",
@@ -826,7 +845,7 @@ async def test_question_answer_annotations(
rid = resp.json()["uuid"]
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}?show=basic",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}?show=basic",
)
assert resp.status_code == 200
data = resp.json()
@@ -834,13 +853,14 @@ async def test_question_answer_annotations(
assert resource.fieldmetadata[0].question_answers[0] == qa_annotation # type: ignore
+@pytest.mark.deploy_modes("standalone")
async def test_link_fields_store_css_selector(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My title",
"slug": "myresource",
@@ -857,7 +877,7 @@ async def test_link_fields_store_css_selector(
rid = resp.json()["uuid"]
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}?show=values",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}?show=values",
)
assert resp.status_code == 200
data = resp.json()
@@ -873,13 +893,14 @@ async def test_link_fields_store_css_selector(
assert css_selector == "main"
+@pytest.mark.deploy_modes("standalone")
async def test_link_fields_store_xpath(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My title",
"slug": "myresource",
@@ -896,7 +917,7 @@ async def test_link_fields_store_xpath(
rid = resp.json()["uuid"]
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}?show=values",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}?show=values",
)
assert resp.status_code == 200
data = resp.json()
@@ -912,12 +933,13 @@ async def test_link_fields_store_xpath(
assert xpath == "my_xpath"
+@pytest.mark.deploy_modes("standalone")
async def test_dates_are_properly_validated(
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await nucliadb_writer.post(
f"/kb/{kbid}/resources",
json={
@@ -949,14 +971,15 @@ async def test_dates_are_properly_validated(
assert resp.json()["origin"]["created"] == "0001-01-01T00:00:00Z"
+@pytest.mark.deploy_modes("standalone")
async def test_file_computed_titles_are_set_on_resource_title(
- nucliadb_writer,
- nucliadb_grpc,
- nucliadb_reader,
- knowledgebox,
+ nucliadb_writer: AsyncClient,
+ nucliadb_ingest_grpc: WriterStub,
+ nucliadb_reader: AsyncClient,
+ standalone_knowledgebox,
):
# Create a resource with an email field
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await nucliadb_writer.post(
f"/kb/{kbid}/resources",
json={
@@ -986,7 +1009,7 @@ async def test_file_computed_titles_are_set_on_resource_title(
fed.field = "email"
fed.title = extracted_title
bm.file_extracted_data.append(fed)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
# Check that the resource title changed
resp = await nucliadb_reader.get(f"/kb/{kbid}/resource/{rid}")
@@ -994,7 +1017,7 @@ async def test_file_computed_titles_are_set_on_resource_title(
assert resp.json()["title"] == extracted_title
# Now test that if the title is changed on creation, it is not overwritten
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await nucliadb_writer.post(
f"/kb/{kbid}/resources",
json={
@@ -1024,7 +1047,7 @@ async def test_file_computed_titles_are_set_on_resource_title(
fed.field = "email"
fed.title = extracted_title
bm.file_extracted_data.append(fed)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
# Check that the resource title changed
resp = await nucliadb_reader.get(f"/kb/{kbid}/resource/{rid2}")
@@ -1034,13 +1057,14 @@ async def test_file_computed_titles_are_set_on_resource_title(
assert title == "Something else"
+@pytest.mark.deploy_modes("standalone")
async def test_jsonl_text_field(
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
resp = await nucliadb_writer.post(
- f"kb/{knowledgebox}/resources",
+ f"kb/{standalone_knowledgebox}/resources",
json={
"title": "My title",
"texts": {
@@ -1060,7 +1084,7 @@ async def test_jsonl_text_field(
rid = resp.json()["uuid"]
resp = await nucliadb_reader.get(
- f"kb/{knowledgebox}/resource/{rid}?show=values&show=basic",
+ f"kb/{standalone_knowledgebox}/resource/{rid}?show=values&show=basic",
)
assert resp.status_code == 200, resp.text
data = resp.json()
@@ -1075,10 +1099,11 @@ async def test_jsonl_text_field(
assert data["icon"] == "application/x-ndjson"
+@pytest.mark.deploy_modes("standalone")
async def test_extract_strategy_on_fields(
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
processing = get_processing()
assert isinstance(processing, DummyProcessingEngine)
@@ -1087,7 +1112,7 @@ async def test_extract_strategy_on_fields(
# Create a resource with a field of each type
resp = await nucliadb_writer.post(
- f"kb/{knowledgebox}/resources",
+ f"kb/{standalone_knowledgebox}/resources",
json={
"title": "My title",
"texts": {
@@ -1120,7 +1145,7 @@ async def test_extract_strategy_on_fields(
# Check that the extract strategies are stored
resp = await nucliadb_reader.get(
- f"kb/{knowledgebox}/resource/{rid}?show=values",
+ f"kb/{standalone_knowledgebox}/resource/{rid}?show=values",
)
assert resp.status_code == 200, resp.text
data = resp.json()
@@ -1145,7 +1170,7 @@ def validate_processing_call(processing: DummyProcessingEngine):
# Reprocess resource should also send the extract strategies
resp = await nucliadb_writer.post(
- f"kb/{knowledgebox}/resource/{rid}/reprocess",
+ f"kb/{standalone_knowledgebox}/resource/{rid}/reprocess",
)
assert resp.status_code == 202, resp.text
@@ -1153,7 +1178,7 @@ def validate_processing_call(processing: DummyProcessingEngine):
# Update them to make sure they are stored correctly
resp = await nucliadb_writer.patch(
- f"kb/{knowledgebox}/resource/{rid}",
+ f"kb/{standalone_knowledgebox}/resource/{rid}",
json={
"texts": {
"text": {
@@ -1184,7 +1209,7 @@ def validate_processing_call(processing: DummyProcessingEngine):
# Check that the extract strategies are stored
resp = await nucliadb_reader.get(
- f"kb/{knowledgebox}/resource/{rid}?show=values",
+ f"kb/{standalone_knowledgebox}/resource/{rid}?show=values",
)
assert resp.status_code == 200, resp.text
@@ -1198,7 +1223,7 @@ def validate_processing_call(processing: DummyProcessingEngine):
# Upload a file with the upload endpoint, and set the extract strategy via a header
resp = await nucliadb_writer.post(
- f"kb/{knowledgebox}/resource/{rid}/file/file2/upload",
+ f"kb/{standalone_knowledgebox}/resource/{rid}/file/file2/upload",
headers={"x-extract-strategy": "barbafoo"},
content=b"file content",
)
@@ -1207,7 +1232,7 @@ def validate_processing_call(processing: DummyProcessingEngine):
# Check that the extract strategy is stored
resp = await nucliadb_reader.get(
- f"kb/{knowledgebox}/resource/{rid}?show=values",
+ f"kb/{standalone_knowledgebox}/resource/{rid}?show=values",
)
assert resp.status_code == 200, resp.text
data = resp.json()
@@ -1236,7 +1261,7 @@ def header_encode(some_string):
)
file_content = b"file content"
resp = await nucliadb_writer.post(
- f"kb/{knowledgebox}/tusupload",
+ f"kb/{standalone_knowledgebox}/tusupload",
headers={
"x-extract-strategy": "barbafoo-tus",
"tus-resumable": "1.0.0",
@@ -1262,7 +1287,7 @@ def header_encode(some_string):
# Check that the extract strategy is stored
resp = await nucliadb_reader.get(
- f"kb/{knowledgebox}/resource/{rid}?show=values",
+ f"kb/{standalone_knowledgebox}/resource/{rid}?show=values",
)
assert resp.status_code == 200, resp.text
data = resp.json()
diff --git a/nucliadb/tests/nucliadb/integration/test_ask.py b/nucliadb/tests/nucliadb/integration/test_ask.py
index a1c95979f6..ab1cdf443d 100644
--- a/nucliadb/tests/nucliadb/integration/test_ask.py
+++ b/nucliadb/tests/nucliadb/integration/test_ask.py
@@ -31,7 +31,7 @@
StatusGenerativeResponse,
)
-from nucliadb.search.predict import AnswerStatusCode
+from nucliadb.search.predict import AnswerStatusCode, DummyPredictEngine
from nucliadb.search.utilities import get_predict
from nucliadb_models.search import (
AskRequest,
@@ -61,16 +61,17 @@ def audit():
yield audit_mock
+@pytest.mark.deploy_modes("standalone")
async def test_ask(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox: str,
):
- resp = await nucliadb_reader.post(f"/kb/{knowledgebox}/ask", json={"query": "query"})
+ resp = await nucliadb_reader.post(f"/kb/{standalone_knowledgebox}/ask", json={"query": "query"})
assert resp.status_code == 200
context = [{"author": "USER", "text": "query"}]
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "query",
"context": context,
@@ -88,19 +89,20 @@ def find_incomplete_results():
yield
+@pytest.mark.deploy_modes("standalone")
async def test_ask_handles_incomplete_find_results(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox: str,
find_incomplete_results,
):
- resp = await nucliadb_reader.post(f"/kb/{knowledgebox}/ask", json={"query": "query"})
+ resp = await nucliadb_reader.post(f"/kb/{standalone_knowledgebox}/ask", json={"query": "query"})
assert resp.status_code == 529
assert resp.json() == {"detail": "Temporary error on information retrieval. Please try again."}
@pytest.fixture
-async def resource(nucliadb_writer, knowledgebox):
- kbid = knowledgebox
+async def resource(nucliadb_writer: AsyncClient, standalone_knowledgebox: str):
+ kbid = standalone_knowledgebox
resp = await nucliadb_writer.post(
f"/kb/{kbid}/resources",
json={
@@ -116,9 +118,9 @@ async def resource(nucliadb_writer, knowledgebox):
@pytest.fixture
-async def graph_resource(nucliadb_writer, nucliadb_grpc, knowledgebox):
+async def graph_resource(nucliadb_writer: AsyncClient, nucliadb_ingest_grpc, standalone_knowledgebox):
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "Knowledge graph",
"slug": "knowledgegraph",
@@ -194,16 +196,17 @@ async def graph_resource(nucliadb_writer, nucliadb_grpc, knowledgebox):
]
bm = BrokerMessage()
bm.uuid = rid
- bm.kbid = knowledgebox
+ bm.kbid = standalone_knowledgebox
bm.relations.extend(edges)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
await wait_for_sync()
return rid
-async def test_ask_synchronous(nucliadb_reader: AsyncClient, knowledgebox, resource):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_synchronous(nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource):
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={"query": "title"},
headers={"X-Synchronous": "True"},
)
@@ -214,9 +217,12 @@ async def test_ask_synchronous(nucliadb_reader: AsyncClient, knowledgebox, resou
assert resp_data.status == AnswerStatusCode.SUCCESS.prettify()
-async def test_ask_status_code_no_retrieval_data(nucliadb_reader: AsyncClient, knowledgebox):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_status_code_no_retrieval_data(
+ nucliadb_reader: AsyncClient, standalone_knowledgebox: str
+):
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={"query": "title"},
headers={"X-Synchronous": "True"},
)
@@ -227,7 +233,8 @@ async def test_ask_status_code_no_retrieval_data(nucliadb_reader: AsyncClient, k
assert resp_data.status == AnswerStatusCode.NO_RETRIEVAL_DATA.prettify()
-async def test_ask_with_citations(nucliadb_reader: AsyncClient, knowledgebox, resource):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_with_citations(nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource):
citations = {"foo": [], "bar": []} # type: ignore
citations_gen = CitationsGenerativeResponse(citations=citations)
citations_chunk = GenerativeChunk(chunk=citations_gen)
@@ -236,7 +243,7 @@ async def test_ask_with_citations(nucliadb_reader: AsyncClient, knowledgebox, re
predict.ndjson_answer.append(citations_chunk.model_dump_json() + "\n") # type: ignore
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={"query": "title", "citations": True, "citation_threshold": 0.5},
headers={"X-Synchronous": "true"},
)
@@ -248,10 +255,13 @@ async def test_ask_with_citations(nucliadb_reader: AsyncClient, knowledgebox, re
@pytest.mark.parametrize("debug", (True, False))
-async def test_sync_ask_returns_debug_mode(nucliadb_reader: AsyncClient, knowledgebox, resource, debug):
+@pytest.mark.deploy_modes("standalone")
+async def test_sync_ask_returns_debug_mode(
+ nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource, debug
+):
# Make sure prompt context is returned if debug is True
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={"query": "title", "debug": debug},
headers={"X-Synchronous": "True"},
)
@@ -267,8 +277,8 @@ async def test_sync_ask_returns_debug_mode(nucliadb_reader: AsyncClient, knowled
@pytest.fixture
-async def resources(nucliadb_writer, knowledgebox):
- kbid = knowledgebox
+async def resources(nucliadb_writer: AsyncClient, standalone_knowledgebox: str):
+ kbid = standalone_knowledgebox
rids = []
for i in range(2):
resp = await nucliadb_writer.post(
@@ -300,14 +310,17 @@ def parse_ask_response(resp):
return results
-async def test_ask_rag_options_full_resource(nucliadb_reader: AsyncClient, knowledgebox, resources):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_rag_options_full_resource(
+ nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources
+):
resource1, resource2 = resources
predict = get_predict()
predict.calls.clear() # type: ignore
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "title",
"features": ["keyword", "semantic", "relations"],
@@ -331,8 +344,9 @@ async def test_ask_rag_options_full_resource(nucliadb_reader: AsyncClient, knowl
assert prompt_context[f"{resource2}/t/text_field"] == "The body of the text field"
+@pytest.mark.deploy_modes("standalone")
async def test_ask_full_resource_rag_strategy_with_exclude(
- nucliadb_reader: AsyncClient, knowledgebox, resources
+ nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources
):
resource1, resource2 = resources
@@ -340,7 +354,7 @@ async def test_ask_full_resource_rag_strategy_with_exclude(
predict.calls.clear() # type: ignore
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "title",
"features": ["keyword", "semantic", "relations"],
@@ -385,14 +399,17 @@ async def test_ask_full_resource_rag_strategy_with_exclude(
assert prompt_context[f"{resource2}/t/text_field"] == "The body of the text field"
-async def test_ask_rag_options_extend_with_fields(nucliadb_reader: AsyncClient, knowledgebox, resources):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_rag_options_extend_with_fields(
+ nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources
+):
resource1, resource2 = resources
predict = get_predict()
predict.calls.clear() # type: ignore
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "title",
"features": ["keyword", "semantic", "relations"],
@@ -491,7 +508,10 @@ async def test_ask_rag_options_extend_with_fields(nucliadb_reader: AsyncClient,
),
],
)
-async def test_ask_rag_strategies_validation(nucliadb_reader, invalid_payload, expected_error_msg):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_rag_strategies_validation(
+ nucliadb_reader: AsyncClient, invalid_payload, expected_error_msg
+):
# Invalid strategy as a string
resp = await nucliadb_reader.post(
f"/kb/kbid/ask",
@@ -503,10 +523,11 @@ async def test_ask_rag_strategies_validation(nucliadb_reader, invalid_payload, e
assert expected_error_msg in error_msg
-async def test_ask_capped_context(nucliadb_reader: AsyncClient, knowledgebox, resources):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_capped_context(nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources):
# By default, max size is big enough to fit all the prompt context
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "title",
"rag_strategies": [{"name": "full_resource"}],
@@ -524,7 +545,7 @@ async def test_ask_capped_context(nucliadb_reader: AsyncClient, knowledgebox, re
assert total_size > max_size * 3
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "title",
"rag_strategies": [{"name": "full_resource"}],
@@ -540,15 +561,17 @@ async def test_ask_capped_context(nucliadb_reader: AsyncClient, knowledgebox, re
assert total_size <= max_size * 3
-async def test_ask_on_a_kb_not_found(nucliadb_reader):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_on_a_kb_not_found(nucliadb_reader: AsyncClient):
resp = await nucliadb_reader.post("/kb/unknown_kb_id/ask", json={"query": "title"})
assert resp.status_code == 404
-async def test_ask_max_tokens(nucliadb_reader, knowledgebox, resources):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_max_tokens(nucliadb_reader: AsyncClient, standalone_knowledgebox, resources):
# As an integer
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "title",
"max_tokens": 100,
@@ -558,7 +581,7 @@ async def test_ask_max_tokens(nucliadb_reader, knowledgebox, resources):
# Same but with the max tokens in a dict
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "title",
"max_tokens": {"context": 100, "answer": 50},
@@ -568,8 +591,10 @@ async def test_ask_max_tokens(nucliadb_reader, knowledgebox, resources):
# If the context requested is bigger than the max tokens, it should fail
predict = get_predict()
+ assert isinstance(predict, DummyPredictEngine), "dummy is expected in this test"
+
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "title",
"max_tokens": {"context": predict.max_context + 1},
@@ -578,9 +603,10 @@ async def test_ask_max_tokens(nucliadb_reader, knowledgebox, resources):
assert resp.status_code == 412
-async def test_ask_on_resource(nucliadb_reader: AsyncClient, knowledgebox, resource):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_on_resource(nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource):
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/resource/{resource}/ask",
+ f"/kb/{standalone_knowledgebox}/resource/{resource}/ask",
json={"query": "title"},
headers={"X-Synchronous": "True"},
)
@@ -588,8 +614,12 @@ async def test_ask_on_resource(nucliadb_reader: AsyncClient, knowledgebox, resou
SyncAskResponse.model_validate_json(resp.content)
-async def test_ask_handles_stream_errors_on_predict(nucliadb_reader, knowledgebox, resource):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_handles_stream_errors_on_predict(
+ nucliadb_reader: AsyncClient, standalone_knowledgebox, resource
+):
predict = get_predict()
+ assert isinstance(predict, DummyPredictEngine), "dummy is expected in this test"
prev = predict.ndjson_answer.copy()
predict.ndjson_answer.pop(-1)
@@ -599,7 +629,7 @@ async def test_ask_handles_stream_errors_on_predict(nucliadb_reader, knowledgebo
# Sync ask
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={"query": "title"},
headers={"X-Synchronous": "True"},
)
@@ -610,7 +640,7 @@ async def test_ask_handles_stream_errors_on_predict(nucliadb_reader, knowledgebo
# Stream ask
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={"query": "title"},
)
assert resp.status_code == 200
@@ -623,28 +653,34 @@ async def test_ask_handles_stream_errors_on_predict(nucliadb_reader, knowledgebo
predict.ndjson_answer = prev
-async def test_ask_handles_stream_unexpected_errors_sync(nucliadb_reader, knowledgebox, resource):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_handles_stream_unexpected_errors_sync(
+ nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource
+):
with mock.patch(
"nucliadb.search.search.chat.ask.AskResult._stream",
side_effect=ValueError("foobar"),
):
# Sync ask -- should return a 500
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={"query": "title"},
headers={"X-Synchronous": "True"},
)
assert resp.status_code == 500
-async def test_ask_handles_stream_unexpected_errors_stream(nucliadb_reader, knowledgebox, resource):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_handles_stream_unexpected_errors_stream(
+ nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource
+):
with mock.patch(
"nucliadb.search.search.chat.ask.AskResult._stream",
side_effect=ValueError("foobar"),
):
# Stream ask -- should handle by yielding the error item
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={"query": "title"},
)
assert resp.status_code == 200
@@ -656,12 +692,13 @@ async def test_ask_handles_stream_unexpected_errors_stream(nucliadb_reader, know
)
+@pytest.mark.deploy_modes("standalone")
async def test_ask_with_json_schema_output(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox: str,
resource,
):
- resp = await nucliadb_reader.post(f"/kb/{knowledgebox}/ask", json={"query": "query"})
+ resp = await nucliadb_reader.post(f"/kb/{standalone_knowledgebox}/ask", json={"query": "query"})
assert resp.status_code == 200
predict = get_predict()
@@ -670,7 +707,7 @@ async def test_ask_with_json_schema_output(
predict.ndjson_answer = [GenerativeChunk(chunk=predict_answer).model_dump_json() + "\n"] # type: ignore
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "title",
"features": ["keyword", "semantic", "relations"],
@@ -689,10 +726,13 @@ async def test_ask_with_json_schema_output(
assert answer_json["confidence"] == 0.5
+@pytest.mark.deploy_modes("standalone")
async def test_ask_assert_audit_retrieval_contexts(
- nucliadb_reader: AsyncClient, knowledgebox, resources, audit
+ nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources, audit
):
- resp = await nucliadb_reader.post(f"/kb/{knowledgebox}/ask", json={"query": "title", "debug": True})
+ resp = await nucliadb_reader.post(
+ f"/kb/{standalone_knowledgebox}/ask", json={"query": "title", "debug": True}
+ )
assert resp.status_code == 200
retrieved_context = audit.chat.call_args_list[0].kwargs["retrieved_context"]
@@ -701,11 +741,12 @@ async def test_ask_assert_audit_retrieval_contexts(
}
+@pytest.mark.deploy_modes("standalone")
async def test_ask_rag_strategy_neighbouring_paragraphs(
- nucliadb_reader: AsyncClient, knowledgebox, resources
+ nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources
):
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "title",
"rag_strategies": [{"name": "neighbouring_paragraphs", "before": 2, "after": 2}],
@@ -718,11 +759,12 @@ async def test_ask_rag_strategy_neighbouring_paragraphs(
assert ask_response.prompt_context is not None
+@pytest.mark.deploy_modes("standalone")
async def test_ask_rag_strategy_metadata_extension(
- nucliadb_reader: AsyncClient, knowledgebox, resources
+ nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources
):
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "title",
"rag_strategies": [
@@ -757,7 +799,7 @@ async def test_ask_rag_strategy_metadata_extension(
{"name": "field_extension", "fields": ["a/title", "a/summary"]},
]:
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "title",
"rag_strategies": [
@@ -782,9 +824,10 @@ async def test_ask_rag_strategy_metadata_extension(
assert origin_found, ask_response.prompt_context
-async def test_ask_top_k(nucliadb_reader: AsyncClient, knowledgebox, resources):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_top_k(nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources):
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "title",
},
@@ -797,7 +840,7 @@ async def test_ask_top_k(nucliadb_reader: AsyncClient, knowledgebox, resources):
# Check that the top_k is respected
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "title",
"top_k": 1,
@@ -810,18 +853,18 @@ async def test_ask_top_k(nucliadb_reader: AsyncClient, knowledgebox, resources):
assert ask_response.retrieval_results.best_matches[0] == prev_best_matches[0]
-@pytest.mark.asyncio
@pytest.mark.parametrize("relation_ranking", ["generative", "reranker"])
@patch("nucliadb.search.search.graph_strategy.get_predict")
@patch("nucliadb.search.search.graph_strategy.rank_relations_reranker")
@patch("nucliadb.search.search.graph_strategy.rank_relations_generative")
+@pytest.mark.deploy_modes("standalone")
async def test_ask_graph_strategy(
mocker_generative,
mocker_reranker,
mocker_predict,
relation_ranking: str,
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox: str,
graph_resource,
):
# Mock the rank_relations functions to return the same relations with a score of 5 (no ranking)
@@ -849,7 +892,7 @@ def mock_rank(relations, *args, **kwargs):
}
headers = {"X-Synchronous": "True"}
- url = f"/kb/{knowledgebox}/ask"
+ url = f"/kb/{standalone_knowledgebox}/ask"
async def assert_ask(d, expected_paragraphs_text, expected_paragraphs_relations):
resp = await nucliadb_reader.post(
@@ -955,9 +998,12 @@ async def assert_ask(d, expected_paragraphs_text, expected_paragraphs_relations)
await assert_ask(data, expected_paragraphs_text, expected_paragraphs_relations)
-async def test_ask_rag_strategy_prequeries(nucliadb_reader: AsyncClient, knowledgebox, resources):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_rag_strategy_prequeries(
+ nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resources
+):
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "",
"rag_strategies": [
@@ -989,12 +1035,13 @@ async def test_ask_rag_strategy_prequeries(nucliadb_reader: AsyncClient, knowled
assert len(ask_response.prequeries["title_query"].best_matches) > 1
+@pytest.mark.deploy_modes("standalone")
async def test_ask_rag_strategy_prequeries_with_full_resource(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox: str,
):
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={
"query": "",
"rag_strategies": [
@@ -1023,13 +1070,14 @@ async def test_ask_rag_strategy_prequeries_with_full_resource(
assert resp.status_code == 200, resp.text
+@pytest.mark.deploy_modes("standalone")
async def test_ask_rag_strategy_prequeries_with_prefilter(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox: str,
resources,
):
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
headers={"X-Synchronous": "True"},
json={
"query": "",
@@ -1070,12 +1118,13 @@ async def test_ask_rag_strategy_prequeries_with_prefilter(
assert ask_response.prequeries["prequery"].resources[expected_rid].title == "The title 0"
+@pytest.mark.deploy_modes("standalone")
async def test_ask_on_resource_with_json_schema_automatic_prequeries(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox: str,
resource,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
rid = resource
answer_json_schema = {
"name": "book_ordering",
@@ -1106,9 +1155,10 @@ async def test_ask_on_resource_with_json_schema_automatic_prequeries(
assert len(ask_response.prequeries) == 4
+@pytest.mark.deploy_modes("standalone")
async def test_all_rag_strategies_combinations(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox: str,
resources,
):
rag_strategies = [
@@ -1137,22 +1187,23 @@ def valid_combination(combination: list[RagStrategies]) -> bool:
for combination in valid_combinations: # type: ignore
print(f"Combination: {sorted([strategy.name for strategy in combination])}")
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
headers={"X-Synchronous": "True"},
json={
"query": "title",
- "rag_strategies": [strategy.dict() for strategy in combination],
+ "rag_strategies": [strategy.model_dump() for strategy in combination],
},
)
assert resp.status_code == 200, resp.text
+@pytest.mark.deploy_modes("standalone")
async def test_ask_fails_with_answer_json_schema_too_big(
nucliadb_reader: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
resources: list[str],
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
rid = resources[0]
resp = await nucliadb_reader.post(
@@ -1184,13 +1235,14 @@ async def test_ask_fails_with_answer_json_schema_too_big(
)
+@pytest.mark.deploy_modes("standalone")
async def test_rag_image_rag_strategies(
nucliadb_reader: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
resources: list[str],
):
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
headers={"X-Synchronous": "True"},
json={
"query": "title",
@@ -1199,7 +1251,7 @@ async def test_rag_image_rag_strategies(
assert resp.status_code == 200, resp.text
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
headers={"X-Synchronous": "True"},
json={
"query": "title",
@@ -1220,10 +1272,13 @@ async def test_rag_image_rag_strategies(
assert resp.status_code == 200, resp.text
-async def test_ask_skip_answer_generation(nucliadb_reader: AsyncClient, knowledgebox, resource):
+@pytest.mark.deploy_modes("standalone")
+async def test_ask_skip_answer_generation(
+ nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource
+):
# Synchronous
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={"query": "title", "generate_answer": False, "debug": True},
headers={"X-Synchronous": "True"},
)
@@ -1237,7 +1292,7 @@ async def test_ask_skip_answer_generation(nucliadb_reader: AsyncClient, knowledg
# Streaming
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/ask",
+ f"/kb/{standalone_knowledgebox}/ask",
json={"query": "title", "generate_answer": False, "debug": True},
)
assert resp.status_code == 200
diff --git a/nucliadb/tests/nucliadb/integration/test_conversation.py b/nucliadb/tests/nucliadb/integration/test_conversation.py
index 1dd203c200..198bd78561 100644
--- a/nucliadb/tests/nucliadb/integration/test_conversation.py
+++ b/nucliadb/tests/nucliadb/integration/test_conversation.py
@@ -45,7 +45,9 @@
@pytest.fixture(scope="function")
-async def resource_with_conversation(nucliadb_grpc, nucliadb_writer, knowledgebox):
+async def resource_with_conversation(
+ nucliadb_ingest_grpc, nucliadb_writer: AsyncClient, standalone_knowledgebox
+):
messages = []
for i in range(1, 301):
messages.append(
@@ -55,13 +57,13 @@ async def resource_with_conversation(nucliadb_grpc, nucliadb_writer, knowledgebo
timestamp=datetime.now(),
content=InputMessageContent(text="What is the meaning of life?"),
ident=str(i),
- type=MessageType.QUESTION.value,
+ type=MessageType.QUESTION,
)
)
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
headers={"Content-Type": "application/json"},
- data=CreateResourcePayload(
+ content=CreateResourcePayload(
slug="myresource",
conversations={
"faq": InputConversationField(messages=messages),
@@ -74,13 +76,13 @@ async def resource_with_conversation(nucliadb_grpc, nucliadb_writer, knowledgebo
# add another message using the api to add single message
resp = await nucliadb_writer.put(
- f"/kb/{knowledgebox}/resource/{rid}/conversation/faq/messages",
- data="["
+ f"/kb/{standalone_knowledgebox}/resource/{rid}/conversation/faq/messages",
+ content="["
+ InputMessage(
to=[f"computer"],
content=InputMessageContent(text="42"),
ident="computer",
- type=MessageType.ANSWER.value,
+ type=MessageType.ANSWER,
).model_dump_json(by_alias=True)
+ "]",
)
@@ -92,7 +94,7 @@ async def resource_with_conversation(nucliadb_grpc, nucliadb_writer, knowledgebo
bm = BrokerMessage()
bm.uuid = rid
- bm.kbid = knowledgebox
+ bm.kbid = standalone_knowledgebox
field = FieldID(field="faq", field_type=FieldType.CONVERSATION)
etw = ExtractedTextWrapper()
@@ -108,20 +110,21 @@ async def resource_with_conversation(nucliadb_grpc, nucliadb_writer, knowledgebo
fmw.metadata.split_metadata[split].paragraphs.append(paragraph)
bm.field_metadata.append(fmw)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
yield rid
+@pytest.mark.deploy_modes("standalone")
async def test_conversations(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
resource_with_conversation,
):
rid = resource_with_conversation
# get field summary
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}?show=values")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}?show=values")
assert resp.status_code == 200
res_resp = ResponseResponse.model_validate(resp.json())
@@ -133,7 +136,9 @@ async def test_conversations(
)
# get first page
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}/conversation/faq?page=1")
+ resp = await nucliadb_reader.get(
+ f"/kb/{standalone_knowledgebox}/resource/{rid}/conversation/faq?page=1"
+ )
assert resp.status_code == 200
field_resp = ResourceField.model_validate(resp.json())
msgs = field_resp.value["messages"] # type: ignore
@@ -142,7 +147,9 @@ async def test_conversations(
assert msgs[0]["type"] == MessageType.QUESTION.value
# get second page
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}/conversation/faq?page=2")
+ resp = await nucliadb_reader.get(
+ f"/kb/{standalone_knowledgebox}/resource/{rid}/conversation/faq?page=2"
+ )
assert resp.status_code == 200
field_resp = ResourceField.model_validate(resp.json())
msgs = field_resp.value["messages"] # type: ignore
@@ -151,15 +158,16 @@ async def test_conversations(
assert msgs[-1]["type"] == MessageType.ANSWER.value
+@pytest.mark.deploy_modes("standalone")
async def test_extracted_text_is_serialized_properly(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
resource_with_conversation,
):
rid = resource_with_conversation
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}?show=values&show=extracted&extracted=text",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}?show=values&show=extracted&extracted=text",
)
assert resp.status_code == 200
resource = Resource.model_validate(resp.json())
@@ -169,15 +177,16 @@ async def test_extracted_text_is_serialized_properly(
assert extracted.text.split_text["2"] == "Split text 2" # type: ignore
+@pytest.mark.deploy_modes("standalone")
async def test_find_conversations(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox: str,
resource_with_conversation,
):
rid = resource_with_conversation
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/find?query=",
+ f"/kb/{standalone_knowledgebox}/find?query=",
)
assert resp.status_code == 200
results = KnowledgeboxFindResults.model_validate(resp.json())
@@ -185,7 +194,7 @@ async def test_find_conversations(
assert matching_rid == rid
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}?show=values&show=extracted&extracted=text"
+ f"/kb/{standalone_knowledgebox}/resource/{rid}?show=values&show=extracted&extracted=text"
)
assert resp.status_code == 200
resource = Resource.model_validate(resp.json())
@@ -205,7 +214,10 @@ async def test_find_conversations(
assert paragraphs[f"{rid}/c/faq/2/0-12"].text == "Split text 2"
-async def test_cannot_create_message_ident_0(nucliadb_grpc, nucliadb_writer, knowledgebox):
+@pytest.mark.deploy_modes("standalone")
+async def test_cannot_create_message_ident_0(
+ nucliadb_ingest_grpc, nucliadb_writer: AsyncClient, standalone_knowledgebox: str
+):
messages = [
# model_construct skips validation, to test the API error
InputMessage.model_construct(
@@ -214,13 +226,13 @@ async def test_cannot_create_message_ident_0(nucliadb_grpc, nucliadb_writer, kno
timestamp=datetime.now(),
content=InputMessageContent(text="What is the meaning of life?"),
ident="0",
- type=MessageType.QUESTION.value,
+ type=MessageType.QUESTION,
)
]
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
headers={"Content-Type": "application/json"},
- data=CreateResourcePayload(
+ content=CreateResourcePayload(
slug="myresource",
conversations={
"faq": InputConversationField(messages=messages),
diff --git a/nucliadb/tests/nucliadb/integration/test_counters.py b/nucliadb/tests/nucliadb/integration/test_counters.py
index d54b772ff7..13628f68aa 100644
--- a/nucliadb/tests/nucliadb/integration/test_counters.py
+++ b/nucliadb/tests/nucliadb/integration/test_counters.py
@@ -17,20 +17,22 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
#
+import pytest
from httpx import AsyncClient
+@pytest.mark.deploy_modes("standalone")
async def test_counters(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
# PUBLIC API
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}")
assert resp.status_code == 200
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My title",
"slug": "myresource",
@@ -40,18 +42,18 @@ async def test_counters(
assert resp.status_code == 201
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={"slug": "myresource2", "title": "mytitle1"},
)
assert resp.status_code == 201
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={"slug": "myresource3", "title": "mytitle1"},
)
assert resp.status_code == 201
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/counters")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/counters")
assert resp.status_code == 200
assert resp.json()["resources"] == 3
diff --git a/nucliadb/tests/nucliadb/integration/test_data_augmentation_field_generation.py b/nucliadb/tests/nucliadb/integration/test_data_augmentation_field_generation.py
index 884725b784..05ebd47ba9 100644
--- a/nucliadb/tests/nucliadb/integration/test_data_augmentation_field_generation.py
+++ b/nucliadb/tests/nucliadb/integration/test_data_augmentation_field_generation.py
@@ -177,13 +177,14 @@ async def test_send_to_process_generated_fields(
assert "/g/da/author" in index_message.texts[f"t/{da_field}"].labels
+@pytest.mark.deploy_modes("standalone")
async def test_data_augmentation_field_generation_and_search(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox: str,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox: str,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
slug = "my-resource"
field_id = "my-text"
@@ -226,7 +227,7 @@ async def test_data_augmentation_field_generation_and_search(
field_metadata.field.CopyFrom(field_id_pb)
field_metadata.metadata.metadata.paragraphs.append(Paragraph(start=0, end=25))
bm.field_metadata.append(field_metadata)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
# Data augmentation broker message
bm = BrokerMessage()
@@ -237,7 +238,7 @@ async def test_data_augmentation_field_generation_and_search(
bm.texts[da_field_id].body = "Text author"
bm.texts[da_field_id].md5 = hashlib.md5("Text author".encode()).hexdigest()
bm.texts[da_field_id].generated_by.data_augmentation.SetInParent()
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
# Processed DA resource (from processing)
bm = BrokerMessage()
@@ -261,7 +262,7 @@ async def test_data_augmentation_field_generation_and_search(
field_metadata.field.CopyFrom(da_field_id_pb)
field_metadata.metadata.metadata.paragraphs.append(Paragraph(start=0, end=28))
bm.field_metadata.append(field_metadata)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
# Now validate we can search and filter out data augmentation fields
resp = await nucliadb_reader.post(
diff --git a/nucliadb/tests/nucliadb/integration/test_deletion.py b/nucliadb/tests/nucliadb/integration/test_deletion.py
index 2633b4d957..dcbdf6e2a5 100644
--- a/nucliadb/tests/nucliadb/integration/test_deletion.py
+++ b/nucliadb/tests/nucliadb/integration/test_deletion.py
@@ -19,6 +19,7 @@
#
import dataclasses
+import pytest
from httpx import AsyncClient
from nucliadb.common import datamanagers
@@ -46,16 +47,19 @@ class FieldData:
vector: tuple[str, list[float]]
+@pytest.mark.deploy_modes("standalone")
async def test_paragraph_index_deletions(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
# Prepare data for a resource with title, summary and a text field
async with datamanagers.with_ro_transaction() as txn:
- vectorsets = [vs async for _, vs in datamanagers.vectorsets.iter(txn, kbid=knowledgebox)]
+ vectorsets = [
+ vs async for _, vs in datamanagers.vectorsets.iter(txn, kbid=standalone_knowledgebox)
+ ]
assert len(vectorsets) == 1
vectorset_id = vectorsets[0].vectorset_id
vector_dimension = vectorsets[0].vectorset_index_config.vector_dimension
@@ -86,7 +90,7 @@ async def test_paragraph_index_deletions(
# Create a resource with a simple text field
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": title_field.text,
"summary": summary_field.text,
@@ -104,7 +108,7 @@ async def test_paragraph_index_deletions(
# Check that searching for original texts returns title and summary (text is
# not indexed)
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={
"query": "Original",
"features": [SearchOptions.KEYWORD],
@@ -120,14 +124,16 @@ async def test_paragraph_index_deletions(
assert list(sorted(fields.keys())) == ["/a/summary", "/a/title"]
# Inject corresponding broker message as if it was coming from the processor
- bmb = BrokerMessageBuilder(kbid=knowledgebox, rid=rid, source=BrokerMessage.MessageSource.PROCESSOR)
+ bmb = BrokerMessageBuilder(
+ kbid=standalone_knowledgebox, rid=rid, source=BrokerMessage.MessageSource.PROCESSOR
+ )
bm = prepare_broker_message(bmb, title_field, summary_field, text_field)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
await wait_for_sync() # wait until changes are searchable
# Check that searching for original texts does not return any results
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={
"query": "Original",
"features": [SearchOptions.KEYWORD],
@@ -141,7 +147,7 @@ async def test_paragraph_index_deletions(
# Check that searching for extracted texts returns all fields
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={
"query": "Extracted",
"features": [SearchOptions.KEYWORD],
@@ -164,7 +170,7 @@ async def test_paragraph_index_deletions(
)
resp = await nucliadb_writer.patch(
- f"/kb/{knowledgebox}/resource/{rid}",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}",
json={
"texts": {
text_field.field_id: {
@@ -177,15 +183,17 @@ async def test_paragraph_index_deletions(
assert resp.status_code == 200
# Inject broker message with the modified text
- bmb = BrokerMessageBuilder(kbid=knowledgebox, rid=rid, source=BrokerMessage.MessageSource.PROCESSOR)
+ bmb = BrokerMessageBuilder(
+ kbid=standalone_knowledgebox, rid=rid, source=BrokerMessage.MessageSource.PROCESSOR
+ )
bm = prepare_broker_message(bmb, title_field, summary_field, text_field)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
await wait_for_sync() # wait until changes are searchable
# Check that searching for the first extracted text now doesn't return the
# text field (as it has been modified)
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={
"query": "Extracted",
"features": [SearchOptions.KEYWORD],
@@ -202,7 +210,7 @@ async def test_paragraph_index_deletions(
# Check that searching for the modified text only returns the text field
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={
"query": "Modified",
"features": [SearchOptions.KEYWORD],
diff --git a/nucliadb/tests/nucliadb/integration/test_entities.py b/nucliadb/tests/nucliadb/integration/test_entities.py
index 9ab8e00b0c..2f914ecdb3 100644
--- a/nucliadb/tests/nucliadb/integration/test_entities.py
+++ b/nucliadb/tests/nucliadb/integration/test_entities.py
@@ -65,9 +65,9 @@
@pytest.fixture
async def text_field(
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
field_id = "text-field"
resp = await nucliadb_writer.post(
@@ -93,12 +93,12 @@ async def text_field(
@pytest.fixture
-async def processing_entities(nucliadb_grpc: WriterStub, knowledgebox: str):
+async def processing_entities(nucliadb_ingest_grpc: WriterStub, standalone_knowledgebox: str):
entities = {
"cat": {"value": "cat"},
"dolphin": {"value": "dolphin"},
}
- bm = broker_resource(knowledgebox, slug="automatic-entities")
+ bm = broker_resource(standalone_knowledgebox, slug="automatic-entities")
ufm = UserFieldMetadata(
field=FieldID(field_type=FieldType.GENERIC, field="title"),
token=[TokenSplit(token="cat", start=0, end=3, klass="ANIMALS")],
@@ -121,13 +121,13 @@ async def processing_entities(nucliadb_grpc: WriterStub, knowledgebox: str):
)
)
bm.relations.extend(relations)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
await wait_for_sync()
@pytest.fixture
async def annotated_entities(
- nucliadb_writer: AsyncClient, text_field: tuple[str, str, str], nucliadb_grpc
+ nucliadb_writer: AsyncClient, text_field: tuple[str, str, str], nucliadb_ingest_grpc
):
kbid, rid, field_id = text_field
@@ -167,7 +167,7 @@ async def annotated_entities(
bm_indexed = 0
retries = 0
while not bm_indexed:
- response: GetEntitiesGroupResponse = await nucliadb_grpc.GetEntitiesGroup(
+ response: GetEntitiesGroupResponse = await nucliadb_ingest_grpc.GetEntitiesGroup(
GetEntitiesGroupRequest(kb=KnowledgeBoxID(uuid=kbid), group="ANIMALS")
)
bm_indexed = "bird" in response.group.entities
@@ -180,7 +180,7 @@ async def annotated_entities(
@pytest.fixture
-async def user_entities(nucliadb_writer: AsyncClient, knowledgebox: str):
+async def user_entities(nucliadb_writer: AsyncClient, standalone_knowledgebox: str):
await wait_for_sync()
payload = CreateEntitiesGroupPayload(
group="ANIMALS",
@@ -192,31 +192,32 @@ async def user_entities(nucliadb_writer: AsyncClient, knowledgebox: str):
title="Animals",
color="black",
)
- resp = await create_entities_group(nucliadb_writer, knowledgebox, payload)
+ resp = await create_entities_group(nucliadb_writer, standalone_knowledgebox, payload)
assert resp.status_code == 200
@pytest.fixture
async def entities(
- nucliadb_grpc: WriterStub,
- knowledgebox: str,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox: str,
user_entities,
processing_entities,
annotated_entities,
):
"""Single fixture to get entities injected in different ways."""
# Ensure entities are properly stored/indexed
- await wait_until_entity(nucliadb_grpc, knowledgebox, "ANIMALS", "cat")
- await wait_until_entity(nucliadb_grpc, knowledgebox, "ANIMALS", "dolphin")
- await wait_until_entity(nucliadb_grpc, knowledgebox, "ANIMALS", "bird")
+ await wait_until_entity(nucliadb_ingest_grpc, standalone_knowledgebox, "ANIMALS", "cat")
+ await wait_until_entity(nucliadb_ingest_grpc, standalone_knowledgebox, "ANIMALS", "dolphin")
+ await wait_until_entity(nucliadb_ingest_grpc, standalone_knowledgebox, "ANIMALS", "bird")
+@pytest.mark.deploy_modes("standalone")
async def test_get_entities_groups(
nucliadb_reader: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
entities,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await nucliadb_reader.get(f"/kb/{kbid}/entitiesgroup/ANIMALS")
assert resp.status_code == 200
@@ -240,12 +241,13 @@ async def test_get_entities_groups(
assert body["detail"] == "Entities group 'I-DO-NOT-EXIST' does not exist"
+@pytest.mark.deploy_modes("standalone")
async def test_list_entities_groups(
nucliadb_reader: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
entities,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await nucliadb_reader.get(f"/kb/{kbid}/entitiesgroups?show_entities=false")
assert resp.status_code == 200
@@ -256,13 +258,14 @@ async def test_list_entities_groups(
assert len(body["groups"]["ANIMALS"]["entities"]) == 0
+@pytest.mark.deploy_modes("standalone")
async def test_create_entities_group_twice(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
entities,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
payload = CreateEntitiesGroupPayload(
group="ANIMALS",
@@ -274,13 +277,14 @@ async def test_create_entities_group_twice(
assert resp.status_code == 409
+@pytest.mark.deploy_modes("standalone")
async def test_update_entities_group(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
entities,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
update = UpdateEntitiesGroupPayload(
add={"seal": Entity(value="seal")},
@@ -303,13 +307,14 @@ async def test_update_entities_group(
assert body["entities"]["dog"]["value"] == "updated-dog"
+@pytest.mark.deploy_modes("standalone")
async def test_update_indexed_entities_group(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
processing_entities,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
update = UpdateEntitiesGroupPayload(
add={"seal": Entity(value="seal")},
@@ -330,13 +335,14 @@ async def test_update_indexed_entities_group(
assert body["entities"]["dolphin"]["value"] == "updated-dolphin"
+@pytest.mark.deploy_modes("standalone")
async def test_update_entities_group_metadata(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
entities,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
update = UpdateEntitiesGroupPayload(
title="Updated Animals",
@@ -353,13 +359,14 @@ async def test_update_entities_group_metadata(
assert body["color"] == "red"
+@pytest.mark.deploy_modes("standalone")
async def test_delete_entities_group(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
entities,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await delete_entities_group(nucliadb_writer, kbid, "ANIMALS")
assert resp.status_code == 200
@@ -368,13 +375,14 @@ async def test_delete_entities_group(
assert resp.status_code == 404
+@pytest.mark.deploy_modes("standalone")
async def test_delete_and_recreate_entities_group(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
user_entities,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await delete_entities_group(nucliadb_writer, kbid, "ANIMALS")
assert resp.status_code == 200
@@ -385,7 +393,7 @@ async def test_delete_and_recreate_entities_group(
title="Animals",
color="white",
)
- resp = await create_entities_group(nucliadb_writer, knowledgebox, payload)
+ resp = await create_entities_group(nucliadb_writer, standalone_knowledgebox, payload)
assert resp.status_code == 200
resp = await nucliadb_reader.get(f"/kb/{kbid}/entitiesgroup/ANIMALS")
@@ -395,15 +403,16 @@ async def test_delete_and_recreate_entities_group(
assert body["color"] == "white"
+@pytest.mark.deploy_modes("standalone")
async def test_entities_indexing(
nucliadb_reader: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
entities,
predict_mock,
):
# TODO: improve test cases here
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await nucliadb_reader.get(
f"/kb/{kbid}/suggest",
diff --git a/nucliadb/tests/nucliadb/integration/test_export_import.py b/nucliadb/tests/nucliadb/integration/test_export_import.py
index 74f18b8f11..a1fdb84b75 100644
--- a/nucliadb/tests/nucliadb/integration/test_export_import.py
+++ b/nucliadb/tests/nucliadb/integration/test_export_import.py
@@ -38,10 +38,12 @@
@pytest.fixture(scope="function")
-async def src_kb(nucliadb_writer: AsyncClient, nucliadb_manager: AsyncClient) -> AsyncIterator[str]:
+async def src_kb(
+ nucliadb_writer: AsyncClient, nucliadb_writer_manager: AsyncClient
+) -> AsyncIterator[str]:
slug = uuid.uuid4().hex
- resp = await nucliadb_manager.post("/kbs", json={"slug": slug})
+ resp = await nucliadb_writer_manager.post("/kbs", json={"slug": slug})
assert resp.status_code == 201
kbid = resp.json().get("uuid")
@@ -117,7 +119,7 @@ async def src_kb(nucliadb_writer: AsyncClient, nucliadb_manager: AsyncClient) ->
assert resp.status_code == 200
yield kbid
- resp = await nucliadb_manager.delete(f"/kb/{kbid}")
+ resp = await nucliadb_writer_manager.delete(f"/kb/{kbid}")
try:
assert resp.status_code == 200
except AssertionError:
@@ -125,12 +127,12 @@ async def src_kb(nucliadb_writer: AsyncClient, nucliadb_manager: AsyncClient) ->
@pytest.fixture(scope="function")
-async def dst_kb(nucliadb_manager: AsyncClient) -> AsyncIterator[str]:
- resp = await nucliadb_manager.post("/kbs", json={"slug": "dst_kb"})
+async def dst_kb(nucliadb_writer_manager: AsyncClient) -> AsyncIterator[str]:
+ resp = await nucliadb_writer_manager.post("/kbs", json={"slug": "dst_kb"})
assert resp.status_code == 201
uuid = resp.json().get("uuid")
yield uuid
- resp = await nucliadb_manager.delete(f"/kb/{uuid}")
+ resp = await nucliadb_writer_manager.delete(f"/kb/{uuid}")
try:
assert resp.status_code == 200
except AssertionError:
@@ -149,6 +151,7 @@ def standalone_nucliadb():
yield
+@pytest.mark.deploy_modes("standalone")
async def test_on_standalone_nucliadb(
standalone_nucliadb,
natsd,
@@ -208,6 +211,7 @@ async def imports_consumer(context: ApplicationContext) -> AsyncIterator[NatsTas
# await consumer.finalize()
+@pytest.mark.deploy_modes("standalone")
async def test_on_hosted_nucliadb(
hosted_nucliadb,
nucliadb_writer: AsyncClient,
@@ -256,6 +260,7 @@ async def _test_export_import_kb_api(
await _test_learning_config_mismatch(nucliadb_writer, export, dst_kb)
+@pytest.mark.deploy_modes("standalone")
async def test_export_and_create_kb_from_import_api(
standalone_nucliadb,
nucliadb_writer: AsyncClient,
@@ -292,6 +297,7 @@ async def test_export_and_create_kb_from_import_api(
await _test_learning_config_mismatch(nucliadb_writer, export, dst_kb)
+@pytest.mark.deploy_modes("standalone")
async def _test_learning_config_mismatch(
nucliadb_writer: AsyncClient,
export: BytesIO,
diff --git a/nucliadb/tests/nucliadb/integration/test_field_external_file.py b/nucliadb/tests/nucliadb/integration/test_field_external_file.py
index 1a82d6a01d..70eb63b76b 100644
--- a/nucliadb/tests/nucliadb/integration/test_field_external_file.py
+++ b/nucliadb/tests/nucliadb/integration/test_field_external_file.py
@@ -18,6 +18,7 @@
# along with this program. If not, see .
#
import pytest
+from httpx import AsyncClient
from nucliadb.writer.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RESOURCES_PREFIX
from nucliadb_utils.settings import nuclia_settings
@@ -40,14 +41,15 @@ def nuclia_jwt_key():
yield
+@pytest.mark.deploy_modes("standalone")
async def test_external_file_field(
nuclia_jwt_key,
- nucliadb_reader,
- nucliadb_writer,
- knowledgebox,
+ nucliadb_reader: AsyncClient,
+ nucliadb_writer: AsyncClient,
+ standalone_knowledgebox,
):
# Create a resource
- kb_path = f"/{KB_PREFIX}/{knowledgebox}"
+ kb_path = f"/{KB_PREFIX}/{standalone_knowledgebox}"
resp = await nucliadb_writer.post(
f"{kb_path}/{RESOURCES_PREFIX}",
json={
diff --git a/nucliadb/tests/nucliadb/integration/test_find.py b/nucliadb/tests/nucliadb/integration/test_find.py
index 261507064c..d4446ce1b7 100644
--- a/nucliadb/tests/nucliadb/integration/test_find.py
+++ b/nucliadb/tests/nucliadb/integration/test_find.py
@@ -39,14 +39,15 @@
from tests.utils import inject_message
+@pytest.mark.deploy_modes("standalone")
async def test_find_with_label_changes(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"slug": "myresource",
"title": "My Title",
@@ -61,7 +62,7 @@ async def test_find_with_label_changes(
# should get 1 result
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={
"query": "title",
},
@@ -72,7 +73,7 @@ async def test_find_with_label_changes(
# assert we get no results with label filter
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={"query": "title", "filters": ["/classification.labels/labels/label1"]},
)
assert resp.status_code == 200
@@ -81,7 +82,7 @@ async def test_find_with_label_changes(
# add new label
resp = await nucliadb_writer.patch(
- f"/kb/{knowledgebox}/resource/{rid}",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}",
json={
# "title": "My new title",
"usermetadata": {
@@ -101,7 +102,7 @@ async def test_find_with_label_changes(
# we should get 1 result now with updated label
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={"query": "title", "filters": ["/classification.labels/labels/label1"]},
)
assert resp.status_code == 200
@@ -109,32 +110,34 @@ async def test_find_with_label_changes(
assert len(body["resources"]) == 1
+@pytest.mark.deploy_modes("standalone")
async def test_find_does_not_support_fulltext_search(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/find?query=title&features=fulltext&features=keyword",
+ f"/kb/{standalone_knowledgebox}/find?query=title&features=fulltext&features=keyword",
)
assert resp.status_code == 422
assert "fulltext search not supported" in resp.json()["detail"][0]["msg"]
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={"query": "title", "features": [SearchOptions.FULLTEXT, SearchOptions.KEYWORD]},
)
assert resp.status_code == 422
assert "fulltext search not supported" in resp.json()["detail"][0]["msg"]
+@pytest.mark.deploy_modes("standalone")
async def test_find_resource_filters(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My Title",
"summary": "My summary",
@@ -145,7 +148,7 @@ async def test_find_resource_filters(
rid1 = resp.json()["uuid"]
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My Title",
"summary": "My summary",
@@ -157,7 +160,7 @@ async def test_find_resource_filters(
# Should get 2 result
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={
"query": "title",
},
@@ -169,7 +172,7 @@ async def test_find_resource_filters(
# Check that resource filtering works
for rid in [rid1, rid2]:
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={
"query": "title",
"resource_filters": [rid],
@@ -181,19 +184,20 @@ async def test_find_resource_filters(
assert rid in body["resources"]
+@pytest.mark.deploy_modes("standalone")
async def test_find_min_score(
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
# When not specifying the min score on the request
# it should default to 0 for bm25 and 0.7 for semantic
- resp = await nucliadb_reader.post(f"/kb/{knowledgebox}/find", json={"query": "dummy"})
+ resp = await nucliadb_reader.post(f"/kb/{standalone_knowledgebox}/find", json={"query": "dummy"})
assert resp.status_code == 200
assert resp.json()["min_score"] == {"bm25": 0, "semantic": 0.7}
# When specifying the min score on the request
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={"query": "dummy", "min_score": {"bm25": 10, "semantic": 0.5}},
)
assert resp.status_code == 200
@@ -201,19 +205,20 @@ async def test_find_min_score(
# Check that old api still works
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find", json={"query": "dummy", "min_score": 0.5}
+ f"/kb/{standalone_knowledgebox}/find", json={"query": "dummy", "min_score": 0.5}
)
assert resp.status_code == 200
assert resp.json()["min_score"] == {"bm25": 0, "semantic": 0.5}
+@pytest.mark.deploy_modes("standalone")
async def test_story_7286(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"slug": "myresource",
"title": "My Title",
@@ -225,7 +230,7 @@ async def test_story_7286(
rid = resp.json()["uuid"]
resp = await nucliadb_writer.patch(
- f"/kb/{knowledgebox}/resource/{rid}",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}",
json={
"fieldmetadata": [
{
@@ -248,7 +253,7 @@ async def test_story_7286(
with patch("nucliadb.search.search.hydrator.managed_serialize", return_value=None):
# should get no result (because serialize returns None, as the resource is not found in the DB)
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={
"query": "title",
"features": [SearchOptions.KEYWORD, SearchOptions.SEMANTIC, SearchOptions.RELATIONS],
@@ -265,13 +270,14 @@ async def test_story_7286(
assert len(body["resources"]) == 0
+@pytest.mark.deploy_modes("standalone")
async def test_find_marks_fuzzy_results(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"slug": "myresource",
"title": "My Title",
@@ -281,7 +287,7 @@ async def test_find_marks_fuzzy_results(
# Should get only one non-fuzzy result
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={
"query": "Title",
},
@@ -292,7 +298,7 @@ async def test_find_marks_fuzzy_results(
# Should get only one fuzzy result
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={
"query": "totle",
},
@@ -303,7 +309,7 @@ async def test_find_marks_fuzzy_results(
# Should not get any result if exact match term queried
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={
"query": '"totle"',
},
@@ -323,6 +329,7 @@ def check_fuzzy_paragraphs(find_response, *, fuzzy_result: bool, n_expected: int
assert found == n_expected
+@pytest.mark.deploy_modes("standalone")
async def test_find_returns_best_matches(
nucliadb_reader: AsyncClient,
philosophy_books_kb,
@@ -360,10 +367,11 @@ def find_with_limits_exceeded_error():
yield
+@pytest.mark.deploy_modes("standalone")
async def test_find_handles_limits_exceeded_error(
- nucliadb_reader, knowledgebox, find_with_limits_exceeded_error
+ nucliadb_reader: AsyncClient, standalone_knowledgebox, find_with_limits_exceeded_error
):
- kb = knowledgebox
+ kb = standalone_knowledgebox
resp = await nucliadb_reader.get(f"/kb/{kb}/find")
assert resp.status_code == 402
assert resp.json() == {"detail": "over the quota"}
@@ -373,12 +381,13 @@ async def test_find_handles_limits_exceeded_error(
assert resp.json() == {"detail": "over the quota"}
+@pytest.mark.deploy_modes("standalone")
async def test_find_keyword_filters(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
# Create a couple of resources with different keywords in the title
resp = await nucliadb_writer.post(
f"/kb/{kbid}/resources",
@@ -469,6 +478,7 @@ async def test_find_keyword_filters(
), f"Keyword filters: {keyword_filters}, expected rids: {expected_rids}"
+@pytest.mark.deploy_modes("standalone")
async def test_find_highlight(
nucliadb_reader: AsyncClient,
philosophy_books_kb: str,
@@ -496,15 +506,16 @@ async def test_find_highlight(
assert "Marcus Aurelius" in match["text"]
+@pytest.mark.deploy_modes("standalone")
async def test_find_fields_parameter(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox: str,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox: str,
):
text = "This is a text"
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"slug": "myresource",
"title": "My Title",
@@ -521,7 +532,7 @@ async def test_find_fields_parameter(
rid = resp.json()["uuid"]
bm = BrokerMessage()
- bm.kbid = knowledgebox
+ bm.kbid = standalone_knowledgebox
bm.uuid = rid
field = FieldID(field_type=FieldType.TEXT, field="text1")
@@ -537,7 +548,7 @@ async def test_find_fields_parameter(
evw.vectors.vectors.vectors.append(vector)
bm.field_vectors.append(evw)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
# Semantic search only on text fields should work
for fields_param, expected_n_resources in [
@@ -546,7 +557,7 @@ async def test_find_fields_parameter(
(["u"], 0),
]:
resp = await nucliadb_reader.post(
- f"/kb/{knowledgebox}/find",
+ f"/kb/{standalone_knowledgebox}/find",
json={
"query": text,
"features": ["semantic"],
diff --git a/nucliadb/tests/nucliadb/integration/test_labels.py b/nucliadb/tests/nucliadb/integration/test_labels.py
index f984875a36..f77b464fbe 100644
--- a/nucliadb/tests/nucliadb/integration/test_labels.py
+++ b/nucliadb/tests/nucliadb/integration/test_labels.py
@@ -20,6 +20,7 @@
import uuid
from datetime import datetime
+import pytest
from httpx import AsyncClient
from nucliadb.ingest.orm.resource import (
@@ -42,15 +43,16 @@
from nucliadb_models.writer import CreateResourcePayload
from nucliadb_protos import resources_pb2 as rpb
from nucliadb_protos.writer_pb2 import BrokerMessage
+from nucliadb_protos.writer_pb2_grpc import WriterStub
from tests.utils import inject_message
-def broker_resource(knowledgebox: str) -> BrokerMessage:
+def broker_resource(standalone_knowledgebox: str) -> BrokerMessage:
rid = str(uuid.uuid4())
slug = f"{rid}slug1"
bm: BrokerMessage = BrokerMessage(
- kbid=knowledgebox,
+ kbid=standalone_knowledgebox,
uuid=rid,
slug=slug,
type=BrokerMessage.AUTOCOMMIT,
@@ -157,24 +159,25 @@ def broker_resource(knowledgebox: str) -> BrokerMessage:
return bm
-async def inject_resource_with_paragraph_labels(knowledgebox, writer):
- bm = broker_resource(knowledgebox)
+async def inject_resource_with_paragraph_labels(standalone_knowledgebox, writer):
+ bm = broker_resource(standalone_knowledgebox)
await inject_message(writer, bm)
return bm.uuid
+@pytest.mark.deploy_modes("standalone")
async def test_labels_global(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
# PUBLIC API
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}")
assert resp.status_code == 200
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/labelset/label1",
+ f"/kb/{standalone_knowledgebox}/labelset/label1",
json={
"title": "mylabel",
"multiple": False,
@@ -183,21 +186,22 @@ async def test_labels_global(
)
assert resp.status_code == 200
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/labelsets")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/labelsets")
assert resp.status_code == 200
assert len(resp.json()["labelsets"]) == 1
assert resp.json()["labelsets"]["label1"]["multiple"] is False
- rid = await inject_resource_with_paragraph_labels(knowledgebox, nucliadb_grpc)
+ rid = await inject_resource_with_paragraph_labels(standalone_knowledgebox, nucliadb_ingest_grpc)
- resp = await nucliadb_writer.post(f"/kb/{knowledgebox}/resource/{rid}/reindex")
+ resp = await nucliadb_writer.post(f"/kb/{standalone_knowledgebox}/resource/{rid}/reindex")
assert resp.status_code == 200
+@pytest.mark.deploy_modes("standalone")
async def test_classification_labels_cancelled_by_the_user(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
expected_label = {
"label": "label",
@@ -205,7 +209,7 @@ async def test_classification_labels_cancelled_by_the_user(
"cancelled_by_user": True,
}
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My Resource",
"summary": "My summary",
@@ -217,7 +221,7 @@ async def test_classification_labels_cancelled_by_the_user(
# Check cancelled labels come in resource get
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}",
)
assert resp.status_code == 200
content = resp.json()
@@ -225,26 +229,27 @@ async def test_classification_labels_cancelled_by_the_user(
# Check cancelled labels come in resource list
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
)
assert resp.status_code == 200
content = resp.json()
assert content["resources"][0]["usermetadata"]["classifications"][0] == expected_label
# Check cancelled labels come in search results
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query=summary")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?query=summary")
assert resp.status_code == 200
content = resp.json()
assert content["resources"][rid]["usermetadata"]["classifications"][0] == expected_label
+@pytest.mark.deploy_modes("standalone")
async def test_classification_labels_are_shown_in_resource_basic(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc,
- knowledgebox,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox,
):
- rid = await inject_resource_with_paragraph_labels(knowledgebox, nucliadb_grpc)
+ rid = await inject_resource_with_paragraph_labels(standalone_knowledgebox, nucliadb_ingest_grpc)
classifications = [Classification(labelset="labelset1", label="label1")]
@@ -258,19 +263,19 @@ async def test_classification_labels_are_shown_in_resource_basic(
)
# Check resource get
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}?show=basic")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}?show=basic")
assert resp.status_code == 200, f"Response {resp}: {resp.text}"
resource = Resource.model_validate_json(resp.content)
assert resource.computedmetadata == expected_computedmetadata
# Check resources list
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resources?show=basic")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resources?show=basic")
assert resp.status_code == 200
resources = ResourceList.model_validate_json(resp.content)
assert resources.resources[0].computedmetadata == expected_computedmetadata
# Check search results list
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?show=basic")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?show=basic")
assert resp.status_code == 200
results = KnowledgeboxSearchResults.model_validate_json(resp.content)
assert results.resources[rid].computedmetadata == expected_computedmetadata
@@ -309,10 +314,11 @@ def test_add_field_classifications():
)
+@pytest.mark.deploy_modes("standalone")
async def test_fieldmetadata_classification_labels(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
fieldmetadata = UserFieldMetadata(
field=FieldID(field="text", field_type=FieldID.FieldType.TEXT),
@@ -331,14 +337,14 @@ async def test_fieldmetadata_classification_labels(
fieldmetadata=[fieldmetadata],
)
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
- data=payload.model_dump_json(), # type: ignore
+ f"/kb/{standalone_knowledgebox}/resources",
+ content=payload.model_dump_json(),
)
assert resp.status_code == 201
rid = resp.json()["uuid"]
# Check resource get
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}?show=basic")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}?show=basic")
assert resp.status_code == 200
resource = Resource.model_validate_json(resp.content)
assert resource.fieldmetadata[0] == fieldmetadata # type: ignore
diff --git a/nucliadb/tests/nucliadb/integration/test_labelsets.py b/nucliadb/tests/nucliadb/integration/test_labelsets.py
index 6f7f68ff8d..45ffd61448 100644
--- a/nucliadb/tests/nucliadb/integration/test_labelsets.py
+++ b/nucliadb/tests/nucliadb/integration/test_labelsets.py
@@ -18,15 +18,17 @@
# along with this program. If not, see .
#
+import pytest
from httpx import AsyncClient
+@pytest.mark.deploy_modes("standalone")
async def test_selection_labelsets(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await nucliadb_writer.post(
f"/kb/{kbid}/labelset/myselections",
@@ -58,12 +60,13 @@ async def test_selection_labelsets(
assert body["labels"] == []
+@pytest.mark.deploy_modes("standalone")
async def test_duplicated_labelsets_not_allowed(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
# Create labelset
resp = await nucliadb_writer.post(
@@ -105,12 +108,13 @@ async def test_duplicated_labelsets_not_allowed(
assert resp.status_code == 422
+@pytest.mark.deploy_modes("standalone")
async def test_duplicated_labels_not_allowed(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await nucliadb_writer.post(
f"/kb/{kbid}/labelset/myselections",
diff --git a/nucliadb/tests/nucliadb/integration/test_matryoshka_embeddings.py b/nucliadb/tests/nucliadb/integration/test_matryoshka_embeddings.py
index 3c7c8e8954..2b551db91b 100644
--- a/nucliadb/tests/nucliadb/integration/test_matryoshka_embeddings.py
+++ b/nucliadb/tests/nucliadb/integration/test_matryoshka_embeddings.py
@@ -20,6 +20,7 @@
import uuid
from unittest.mock import patch
+import pytest
from faker import Faker
from httpx import AsyncClient
@@ -34,9 +35,10 @@
fake = Faker()
+@pytest.mark.deploy_modes("standalone")
async def test_matryoshka_embeddings(
maindb_driver: Driver,
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
learning_config,
@@ -56,7 +58,7 @@ async def test_matryoshka_embeddings(
semantic_matryoshka_dims=matryoshka_dimensions,
)
- new_kb_response = await nucliadb_grpc.NewKnowledgeBoxV2( # type: ignore
+ new_kb_response = await nucliadb_ingest_grpc.NewKnowledgeBoxV2( # type: ignore
writer_pb2.NewKnowledgeBoxV2Request(
kbid=kbid,
slug=slug,
@@ -117,7 +119,7 @@ async def test_matryoshka_embeddings(
bmb.add_field_builder(text_field)
bm = bmb.build()
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
# Search
diff --git a/nucliadb/tests/nucliadb/integration/test_pinecone_kb.py b/nucliadb/tests/nucliadb/integration/test_pinecone_kb.py
index 1da015de12..0d259576d3 100644
--- a/nucliadb/tests/nucliadb/integration/test_pinecone_kb.py
+++ b/nucliadb/tests/nucliadb/integration/test_pinecone_kb.py
@@ -94,20 +94,43 @@ def mock_pinecone_client(data_plane, control_plane):
yield session_mock
+@pytest.fixture(scope="function")
+async def pinecone_knowledgebox(nucliadb_writer_manager: AsyncClient, mock_pinecone_client):
+ resp = await nucliadb_writer_manager.post(
+ "/kbs",
+ json={
+ "slug": "pinecone_knowledgebox",
+ "external_index_provider": {
+ "type": "pinecone",
+ "api_key": "my-pinecone-api-key",
+ "serverless_cloud": "aws_us_east_1",
+ },
+ },
+ )
+ assert resp.status_code == 201
+ uuid = resp.json().get("uuid")
+
+ yield uuid
+
+ resp = await nucliadb_writer_manager.delete(f"/kb/{uuid}")
+ assert resp.status_code == 200
+
+
@pytest.fixture(autouse=True)
def hosted_nucliadb():
with unittest.mock.patch("nucliadb.ingest.service.writer.is_onprem_nucliadb", return_value=False):
yield
+@pytest.mark.deploy_modes("standalone")
async def test_kb_creation(
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
nucliadb_reader: AsyncClient,
control_plane,
):
"""
This tests the new method for creating kbs on a hosted nucliadb that
- uses the nucliadb_grpc.NewKnowledgeBoxV2 method.
+ uses the nucliadb_ingest_grpc.NewKnowledgeBoxV2 method.
"""
expected_index_names = ["nuclia-someuuid1", "nuclia-someuuid2"]
with mock.patch(
@@ -146,7 +169,7 @@ async def test_kb_creation(
)
# Creating a knowledge with 2 vectorsets box should create two Pinecone indexes
- response: NewKnowledgeBoxV2Response = await nucliadb_grpc.NewKnowledgeBoxV2(
+ response: NewKnowledgeBoxV2Response = await nucliadb_ingest_grpc.NewKnowledgeBoxV2(
request, timeout=None
) # type: ignore
assert response.status == KnowledgeBoxResponseStatus.OK
@@ -192,7 +215,7 @@ async def test_kb_creation(
assert pinecone_config.indexes[english].vector_dimension == 3
# Deleting a knowledge box should delete the Pinecone index
- response = await nucliadb_grpc.DeleteKnowledgeBox(
+ response = await nucliadb_ingest_grpc.DeleteKnowledgeBox(
KnowledgeBoxID(slug=slug, uuid=kbid), timeout=None
) # type: ignore
assert response.status == KnowledgeBoxResponseStatus.OK
@@ -201,6 +224,7 @@ async def test_kb_creation(
assert control_plane.delete_index.call_count == 2
+@pytest.mark.deploy_modes("standalone")
async def test_get_kb(
nucliadb_reader: AsyncClient,
pinecone_knowledgebox: str,
@@ -216,6 +240,7 @@ async def test_get_kb(
assert config["configured_external_index_provider"]["type"] == "pinecone"
+@pytest.mark.deploy_modes("standalone")
async def test_kb_counters(
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
@@ -247,10 +272,11 @@ async def test_kb_counters(
}
+@pytest.mark.deploy_modes("standalone")
async def test_find_on_pinecone_kb(
nucliadb_reader: AsyncClient,
pinecone_knowledgebox: str,
- pinecone_data_plane,
+ data_plane,
):
kbid = pinecone_knowledgebox
@@ -261,7 +287,8 @@ async def test_find_on_pinecone_kb(
assert resp.status_code == 200, resp.text
-async def _inject_broker_message(nucliadb_grpc: WriterStub, kbid: str, rid: str, slug: str):
+@pytest.mark.deploy_modes("standalone")
+async def _inject_broker_message(nucliadb_ingest_grpc: WriterStub, kbid: str, rid: str, slug: str):
bm = BrokerMessage(kbid=kbid, uuid=rid, slug=slug, type=BrokerMessage.AUTOCOMMIT)
bm.basic.icon = "text/plain"
bm.basic.title = "Title Resource"
@@ -358,11 +385,12 @@ async def _inject_broker_message(nucliadb_grpc: WriterStub, kbid: str, rid: str,
bm.field_vectors.append(ev)
bm.source = BrokerMessage.MessageSource.PROCESSOR
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
+@pytest.mark.deploy_modes("standalone")
async def test_ingestion_on_pinecone_kb(
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
pinecone_knowledgebox: str,
@@ -383,7 +411,7 @@ async def test_ingestion_on_pinecone_kb(
assert resp.status_code == 201
rid = resp.json()["uuid"]
- await _inject_broker_message(nucliadb_grpc, kbid, rid, slug)
+ await _inject_broker_message(nucliadb_ingest_grpc, kbid, rid, slug)
assert data_plane.delete_by_id_prefix.await_count == 1
assert data_plane.upsert_in_batches.await_count == 1
@@ -401,9 +429,10 @@ async def app_context(natsd, storage, nucliadb):
await ctx.finalize()
+@pytest.mark.deploy_modes("standalone")
async def test_pinecone_kb_rollover_index(
app_context,
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
nucliadb_writer: AsyncClient,
pinecone_knowledgebox: str,
data_plane,
@@ -425,7 +454,7 @@ async def test_pinecone_kb_rollover_index(
rid = resp.json()["uuid"]
# Inject a broker message as if it was the result of a Nuclia processing request
- await _inject_broker_message(nucliadb_grpc, kbid, rid, slug)
+ await _inject_broker_message(nucliadb_ingest_grpc, kbid, rid, slug)
# Check that vectors were upserted to pinecone
assert data_plane.upsert_in_batches.await_count == 1
diff --git a/nucliadb/tests/nucliadb/integration/test_predict_proxy.py b/nucliadb/tests/nucliadb/integration/test_predict_proxy.py
index 43f0d519c5..f6555f244b 100644
--- a/nucliadb/tests/nucliadb/integration/test_predict_proxy.py
+++ b/nucliadb/tests/nucliadb/integration/test_predict_proxy.py
@@ -20,6 +20,7 @@
import pytest
+from httpx import AsyncClient
@pytest.mark.parametrize(
@@ -44,8 +45,16 @@
),
],
)
-async def test_predict_proxy(nucliadb_reader, knowledgebox, method, endpoint, params, payload):
- kbid = knowledgebox
+@pytest.mark.deploy_modes("standalone")
+async def test_predict_proxy(
+ nucliadb_reader: AsyncClient,
+ standalone_knowledgebox: str,
+ method: str,
+ endpoint: str,
+ params,
+ payload,
+):
+ kbid = standalone_knowledgebox
http_func = getattr(nucliadb_reader, method.lower())
http_func_kwargs = {"params": params}
if method == "POST":
@@ -58,11 +67,12 @@ async def test_predict_proxy(nucliadb_reader, knowledgebox, method, endpoint, pa
assert resp.status_code == 200, resp.text
+@pytest.mark.deploy_modes("standalone")
async def test_predict_proxy_not_proxied_returns_422(
- nucliadb_reader,
- knowledgebox,
+ nucliadb_reader: AsyncClient,
+ standalone_knowledgebox: str,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await nucliadb_reader.post(
f"/kb/{kbid}/predict/summarize",
json={"resources": {"foo": "bar"}},
@@ -70,8 +80,9 @@ async def test_predict_proxy_not_proxied_returns_422(
assert resp.status_code == 422
+@pytest.mark.deploy_modes("standalone")
async def test_predict_proxy_returns_404_on_non_existing_kb(
- nucliadb_reader,
+ nucliadb_reader: AsyncClient,
):
resp = await nucliadb_reader.post(
f"/kb/idonotexist-kb/predict/chat",
diff --git a/nucliadb/tests/nucliadb/integration/test_processing_status.py b/nucliadb/tests/nucliadb/integration/test_processing_status.py
index 5053646f28..27958cf791 100644
--- a/nucliadb/tests/nucliadb/integration/test_processing_status.py
+++ b/nucliadb/tests/nucliadb/integration/test_processing_status.py
@@ -38,14 +38,15 @@
),
],
)
+@pytest.mark.deploy_modes("standalone")
async def test_endpoint_set_resource_status_to_pending(
endpoint,
expected_status,
payload,
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox: str,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox: str,
):
"""
- Create a resource with a status PROCESSED
@@ -53,11 +54,11 @@ async def test_endpoint_set_resource_status_to_pending(
- Check that the status is set to PENDING
"""
# Create a resource, processing
- br = broker_resource(knowledgebox)
+ br = broker_resource(standalone_knowledgebox)
br.texts["text"].CopyFrom(
rpb.FieldText(body="This is my text field", format=rpb.FieldText.Format.PLAIN)
)
- await inject_message(nucliadb_grpc, br)
+ await inject_message(nucliadb_ingest_grpc, br)
# Receive message from processor
br.source = BrokerMessage.MessageSource.PROCESSOR
@@ -66,37 +67,40 @@ async def test_endpoint_set_resource_status_to_pending(
etw.field.field = "text"
etw.field.field_type = rpb.FieldType.TEXT
br.extracted_text.append(etw)
- await inject_message(nucliadb_grpc, br)
+ await inject_message(nucliadb_ingest_grpc, br)
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{br.uuid}")
assert resp.status_code == 200
resp_json = resp.json()
assert resp_json["metadata"]["status"] == "PROCESSED"
kwargs = payload or {}
resp = await nucliadb_writer.post(
- endpoint.format(kbid=knowledgebox, rid=br.uuid),
+ endpoint.format(kbid=standalone_knowledgebox, rid=br.uuid),
**kwargs,
)
assert resp.status_code == expected_status
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{br.uuid}")
assert resp.status_code == 200
resp_json = resp.json()
assert resp_json["metadata"]["status"] == "PENDING"
+@pytest.mark.deploy_modes("standalone")
async def test_field_status_errors_processor(
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox: str,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox: str,
):
# Create a resource, processing
- br = broker_resource(knowledgebox)
- await inject_message(nucliadb_grpc, br)
+ br = broker_resource(standalone_knowledgebox)
+ await inject_message(nucliadb_ingest_grpc, br)
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}?show=basic&show=errors")
+ resp = await nucliadb_reader.get(
+ f"/kb/{standalone_knowledgebox}/resource/{br.uuid}?show=basic&show=errors"
+ )
assert resp.status_code == 200
resp_json = resp.json()
assert resp_json["metadata"]["status"] == "PENDING"
@@ -116,9 +120,11 @@ async def test_field_status_errors_processor(
code=Error.ErrorCode.EXTRACT,
)
)
- await inject_message(nucliadb_grpc, br)
+ await inject_message(nucliadb_ingest_grpc, br)
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}?show=basic&show=errors")
+ resp = await nucliadb_reader.get(
+ f"/kb/{standalone_knowledgebox}/resource/{br.uuid}?show=basic&show=errors"
+ )
assert resp.status_code == 200
resp_json = resp.json()
assert resp_json["metadata"]["status"] == "ERROR"
@@ -128,9 +134,11 @@ async def test_field_status_errors_processor(
# Receive message from processor without errors, previous errors are cleared
br.errors.pop()
- await inject_message(nucliadb_grpc, br)
+ await inject_message(nucliadb_ingest_grpc, br)
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}?show=basic&show=errors")
+ resp = await nucliadb_reader.get(
+ f"/kb/{standalone_knowledgebox}/resource/{br.uuid}?show=basic&show=errors"
+ )
assert resp.status_code == 200
resp_json = resp.json()
assert resp_json["metadata"]["status"] == "PROCESSED"
@@ -139,20 +147,23 @@ async def test_field_status_errors_processor(
assert "errors" not in resp_json["data"]["generics"]["summary"]
+@pytest.mark.deploy_modes("standalone")
async def test_field_status_errors_data_augmentation(
nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox: str,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox: str,
):
# Create a resource, processing
- br = broker_resource(knowledgebox)
+ br = broker_resource(standalone_knowledgebox)
br.texts["text"].CopyFrom(
rpb.FieldText(body="This is my text field", format=rpb.FieldText.Format.PLAIN)
)
- await inject_message(nucliadb_grpc, br)
+ await inject_message(nucliadb_ingest_grpc, br)
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}?show=basic&show=errors")
+ resp = await nucliadb_reader.get(
+ f"/kb/{standalone_knowledgebox}/resource/{br.uuid}?show=basic&show=errors"
+ )
assert resp.status_code == 200
resp_json = resp.json()
assert resp_json["metadata"]["status"] == "PENDING"
@@ -169,9 +180,11 @@ async def test_field_status_errors_data_augmentation(
etw.field.field = "text"
etw.field.field_type = rpb.FieldType.TEXT
br.extracted_text.append(etw)
- await inject_message(nucliadb_grpc, br)
+ await inject_message(nucliadb_ingest_grpc, br)
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}?show=basic&show=errors")
+ resp = await nucliadb_reader.get(
+ f"/kb/{standalone_knowledgebox}/resource/{br.uuid}?show=basic&show=errors"
+ )
assert resp.status_code == 200
resp_json = resp.json()
assert resp_json["metadata"]["status"] == "PROCESSED"
@@ -191,9 +204,11 @@ async def test_field_status_errors_data_augmentation(
g.data_augmentation.SetInParent()
br.generated_by.pop()
br.generated_by.append(g)
- await inject_message(nucliadb_grpc, br)
+ await inject_message(nucliadb_ingest_grpc, br)
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}?show=basic&show=errors")
+ resp = await nucliadb_reader.get(
+ f"/kb/{standalone_knowledgebox}/resource/{br.uuid}?show=basic&show=errors"
+ )
assert resp.status_code == 200
resp_json = resp.json()
assert resp_json["metadata"]["status"] == "PROCESSED"
@@ -203,9 +218,11 @@ async def test_field_status_errors_data_augmentation(
# Receive message from data augmentation without errors
br.errors.pop()
- await inject_message(nucliadb_grpc, br)
+ await inject_message(nucliadb_ingest_grpc, br)
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{br.uuid}?show=basic&show=errors")
+ resp = await nucliadb_reader.get(
+ f"/kb/{standalone_knowledgebox}/resource/{br.uuid}?show=basic&show=errors"
+ )
assert resp.status_code == 200
resp_json = resp.json()
assert resp_json["metadata"]["status"] == "PROCESSED"
diff --git a/nucliadb/tests/nucliadb/integration/test_purge.py b/nucliadb/tests/nucliadb/integration/test_purge.py
index a30b25ebc4..a67e25c420 100644
--- a/nucliadb/tests/nucliadb/integration/test_purge.py
+++ b/nucliadb/tests/nucliadb/integration/test_purge.py
@@ -24,6 +24,7 @@
from typing import cast
from unittest.mock import AsyncMock
+import pytest
from httpx import AsyncClient
import nucliadb.common.nidx
@@ -48,10 +49,12 @@
from tests.utils.dirty_index import wait_for_sync
+@pytest.mark.deploy_modes("standalone")
async def test_purge_deletes_everything_from_maindb(
maindb_driver: Driver,
storage: Storage,
- nucliadb_manager: AsyncClient,
+ nucliadb_writer_manager: AsyncClient,
+ nucliadb_reader_manager: AsyncClient,
nucliadb_writer: AsyncClient,
):
"""Create a KB and some resource and then purge it. Validate that purge
@@ -59,11 +62,11 @@ async def test_purge_deletes_everything_from_maindb(
"""
kb_slug = str(uuid.uuid4())
- resp = await nucliadb_manager.post("/kbs", json={"slug": kb_slug})
+ resp = await nucliadb_writer_manager.post("/kbs", json={"slug": kb_slug})
assert resp.status_code == 201
kbid = resp.json().get("uuid")
- resp = await nucliadb_manager.get("/kbs")
+ resp = await nucliadb_reader_manager.get("/kbs")
body = resp.json()
assert len(body["kbs"]) == 1
assert body["kbs"][0]["uuid"] == kbid
@@ -84,10 +87,10 @@ async def test_purge_deletes_everything_from_maindb(
assert await kb_catalog_entries_count(maindb_driver, kbid) > 0
- resp = await nucliadb_manager.delete(f"/kb/{kbid}")
+ resp = await nucliadb_writer_manager.delete(f"/kb/{kbid}")
assert resp.status_code == 200
- resp = await nucliadb_manager.get("/kbs")
+ resp = await nucliadb_reader_manager.get("/kbs")
body = resp.json()
assert len(body["kbs"]) == 0
@@ -111,10 +114,11 @@ async def test_purge_deletes_everything_from_maindb(
assert len(keys_after_purge_storage) == 0
+@pytest.mark.deploy_modes("standalone")
async def test_purge_orphan_shards(
maindb_driver: Driver,
storage: Storage,
- nucliadb_manager: AsyncClient,
+ nucliadb_writer_manager: AsyncClient,
nucliadb_writer: AsyncClient,
):
"""Create a KB with some resource (hence a shard) and delete it. Simulate an
@@ -122,7 +126,7 @@ async def test_purge_orphan_shards(
"""
kb_slug = str(uuid.uuid4())
- resp = await nucliadb_manager.post("/kbs", json={"slug": kb_slug})
+ resp = await nucliadb_writer_manager.post("/kbs", json={"slug": kb_slug})
assert resp.status_code == 201
kbid = resp.json().get("uuid")
@@ -143,7 +147,7 @@ async def test_purge_orphan_shards(
with unittest.mock.patch.object(nucliadb.common.nidx.get_nidx(), "api_client"):
nucliadb.common.nidx.get_nidx().api_client.DeleteShard = AsyncMock()
- resp = await nucliadb_manager.delete(f"/kb/{kbid}")
+ resp = await nucliadb_writer_manager.delete(f"/kb/{kbid}")
assert resp.status_code == 200, resp.text
await purge_kb(maindb_driver)
@@ -170,10 +174,11 @@ async def test_purge_orphan_shards(
assert len(shards) == 0
+@pytest.mark.deploy_modes("standalone")
async def test_purge_orphan_shard_detection(
maindb_driver: Driver,
storage: Storage,
- nucliadb_manager: AsyncClient,
+ nucliadb_writer_manager: AsyncClient,
nucliadb_writer: AsyncClient,
):
"""Prepare a situation where there are:
@@ -185,7 +190,7 @@ async def test_purge_orphan_shard_detection(
"""
# Regular KB
kb_slug = str(uuid.uuid4())
- resp = await nucliadb_manager.post("/kbs", json={"slug": kb_slug})
+ resp = await nucliadb_writer_manager.post("/kbs", json={"slug": kb_slug})
assert resp.status_code == 201
kbid = resp.json().get("uuid")
@@ -238,15 +243,16 @@ async def kb_catalog_entries_count(driver: Driver, kbid: str) -> int:
return count[0]
+@pytest.mark.deploy_modes("standalone")
async def test_purge_resources_deleted_storage(
maindb_driver: Driver,
storage: Storage,
- nucliadb_manager: AsyncClient,
+ nucliadb_writer_manager: AsyncClient,
nucliadb_writer: AsyncClient,
):
# Create a KB
kb_slug = str(uuid.uuid4())
- resp = await nucliadb_manager.post("/kbs", json={"slug": kb_slug})
+ resp = await nucliadb_writer_manager.post("/kbs", json={"slug": kb_slug})
assert resp.status_code == 201
kbid = resp.json().get("uuid")
diff --git a/nucliadb/tests/nucliadb/integration/test_purge_vectorsets.py b/nucliadb/tests/nucliadb/integration/test_purge_vectorsets.py
index 7ff4ac427a..25c6770fca 100644
--- a/nucliadb/tests/nucliadb/integration/test_purge_vectorsets.py
+++ b/nucliadb/tests/nucliadb/integration/test_purge_vectorsets.py
@@ -22,6 +22,8 @@
import uuid
from unittest.mock import AsyncMock, patch
+import pytest
+
from nucliadb.common import datamanagers
from nucliadb.common.maindb.driver import Driver
from nucliadb.ingest.orm.knowledgebox import (
@@ -37,8 +39,12 @@
from tests.utils import inject_message
+@pytest.mark.deploy_modes("standalone")
async def test_purge_vectorsets__kb_with_vectorsets(
- maindb_driver: Driver, storage: Storage, nucliadb_grpc: WriterStub, knowledgebox_with_vectorsets: str
+ maindb_driver: Driver,
+ storage: Storage,
+ nucliadb_ingest_grpc: WriterStub,
+ knowledgebox_with_vectorsets: str,
):
kbid = knowledgebox_with_vectorsets
vectorset_id = "my-semantic-model-A"
@@ -46,7 +52,7 @@ async def test_purge_vectorsets__kb_with_vectorsets(
resource_count = 5
for i in range(resource_count):
bm = await create_broker_message_with_vectorset(kbid, maindb_driver)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
with patch.object(
storage, "delete_upload", new=AsyncMock(side_effect=storage.delete_upload)
diff --git a/nucliadb/tests/nucliadb/integration/test_reindex.py b/nucliadb/tests/nucliadb/integration/test_reindex.py
index ae910b10b3..4a2ce3c9d5 100644
--- a/nucliadb/tests/nucliadb/integration/test_reindex.py
+++ b/nucliadb/tests/nucliadb/integration/test_reindex.py
@@ -21,6 +21,7 @@
import base64
import hashlib
+import pytest
from httpx import AsyncClient
from nucliadb.common import datamanagers
@@ -32,31 +33,35 @@
from tests.utils import dirty_index, inject_message
+@pytest.mark.deploy_modes("standalone")
async def test_reindex(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox: str,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox: str,
):
- await _test_reindex(nucliadb_reader, nucliadb_writer, nucliadb_grpc, knowledgebox)
+ await _test_reindex(nucliadb_reader, nucliadb_writer, nucliadb_ingest_grpc, standalone_knowledgebox)
+@pytest.mark.deploy_modes("standalone")
async def test_reindex_kb_with_vectorsets(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
knowledgebox_with_vectorsets: str,
):
- await _test_reindex(nucliadb_reader, nucliadb_writer, nucliadb_grpc, knowledgebox_with_vectorsets)
+ await _test_reindex(
+ nucliadb_reader, nucliadb_writer, nucliadb_ingest_grpc, knowledgebox_with_vectorsets
+ )
async def _test_reindex(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
kbid,
):
- rid = await create_resource(kbid, nucliadb_writer, nucliadb_grpc)
+ rid = await create_resource(kbid, nucliadb_writer, nucliadb_ingest_grpc)
# Doing a search should return results
resp = await nucliadb_reader.get(f"/kb/{kbid}/search?query=text")
@@ -102,7 +107,7 @@ async def _test_reindex(
assert len(content["paragraphs"]["results"]) > 0
-async def create_resource(kbid: str, nucliadb_writer: AsyncClient, nucliadb_grpc: WriterStub):
+async def create_resource(kbid: str, nucliadb_writer: AsyncClient, nucliadb_ingest_grpc: WriterStub):
# create resource
file_content = b"This is a file"
field_id = "myfile"
@@ -130,7 +135,7 @@ async def create_resource(kbid: str, nucliadb_writer: AsyncClient, nucliadb_grpc
# update it with extracted data
bm = await broker_resource(kbid, rid)
bm.source = BrokerMessage.MessageSource.PROCESSOR
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
return bm.uuid
diff --git a/nucliadb/tests/nucliadb/integration/test_relations.py b/nucliadb/tests/nucliadb/integration/test_relations.py
index 9a6097ec63..c3d7faf4e6 100644
--- a/nucliadb/tests/nucliadb/integration/test_relations.py
+++ b/nucliadb/tests/nucliadb/integration/test_relations.py
@@ -33,12 +33,12 @@
@pytest.fixture
async def resource_with_bm_relations(
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"slug": "myresource",
"texts": {"text1": {"body": "Mickey loves Minnie"}},
@@ -48,23 +48,24 @@ async def resource_with_bm_relations(
rid = resp.json()["uuid"]
bm = await create_broker_message_with_relations()
- bm.kbid = knowledgebox
+ bm.kbid = standalone_knowledgebox
bm.uuid = rid
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
yield rid, "text1"
+@pytest.mark.deploy_modes("standalone")
async def test_api_aliases(
nucliadb_reader: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
resource_with_bm_relations: tuple[str, str],
):
rid, field_id = resource_with_bm_relations
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}",
params=dict(
show=["relations", "extracted"],
extracted=["metadata"],
@@ -78,7 +79,7 @@ async def test_api_aliases(
assert "from_" not in extracted_metadata["metadata"]["relations"][0]
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}/text/{field_id}",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}/text/{field_id}",
params=dict(
show=["extracted"],
extracted=["metadata"],
@@ -91,9 +92,10 @@ async def test_api_aliases(
assert "from_" not in body["extracted"]["metadata"]["metadata"]["relations"][0]
+@pytest.mark.deploy_modes("standalone")
async def test_broker_message_relations(
nucliadb_reader: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
resource_with_bm_relations: tuple[str, str],
):
"""
@@ -106,7 +108,7 @@ async def test_broker_message_relations(
rid, field_id = resource_with_bm_relations
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}",
params=dict(
show=["relations", "extracted"],
extracted=["metadata"],
@@ -127,7 +129,7 @@ async def test_broker_message_relations(
)
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}/text/{field_id}",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}/text/{field_id}",
params=dict(
show=["extracted"],
extracted=["metadata"],
@@ -138,11 +140,12 @@ async def test_broker_message_relations(
assert len(body["extracted"]["metadata"]["metadata"]["relations"]) == 1
+@pytest.mark.deploy_modes("standalone")
async def test_extracted_relations(
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
"""
Test description:
@@ -151,7 +154,7 @@ async def test_extracted_relations(
extracted and test it.
"""
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My resource",
"slug": "myresource",
@@ -199,7 +202,7 @@ async def test_extracted_relations(
assert resp.status_code == 201
rid = resp.json()["uuid"]
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}?show=basic")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}?show=basic")
assert resp.status_code == 200
assert len(resp.json()["usermetadata"]["relations"]) == 5
diff --git a/nucliadb/tests/nucliadb/integration/test_resources.py b/nucliadb/tests/nucliadb/integration/test_resources.py
index 325dc179d0..9bb972a3a8 100644
--- a/nucliadb/tests/nucliadb/integration/test_resources.py
+++ b/nucliadb/tests/nucliadb/integration/test_resources.py
@@ -29,13 +29,14 @@
from nucliadb.writer.api.v1.router import KB_PREFIX, RESOURCES_PREFIX
+@pytest.mark.deploy_modes("standalone")
async def test_resource_crud(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
resp = await nucliadb_writer.post(
- f"/{KB_PREFIX}/{knowledgebox}/{RESOURCES_PREFIX}",
+ f"/{KB_PREFIX}/{standalone_knowledgebox}/{RESOURCES_PREFIX}",
json={
"slug": "mykb",
"title": "My KB",
@@ -44,35 +45,36 @@ async def test_resource_crud(
assert resp.status_code == 201
rid = resp.json()["uuid"]
- resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{knowledgebox}/resource/{rid}")
+ resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rid}")
assert resp.status_code == 200
assert resp.json()["title"] == "My KB"
resp = await nucliadb_writer.patch(
- f"/{KB_PREFIX}/{knowledgebox}/resource/{rid}",
+ f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rid}",
json={
"title": "My updated KB",
},
)
assert resp.status_code == 200
- resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{knowledgebox}/resource/{rid}")
+ resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rid}")
assert resp.status_code == 200
assert resp.json()["title"] == "My updated KB"
resp = await nucliadb_writer.delete(
- f"/{KB_PREFIX}/{knowledgebox}/resource/{rid}",
+ f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rid}",
)
assert resp.status_code == 204
- resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{knowledgebox}/resource/{rid}")
+ resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rid}")
assert resp.status_code == 404
+@pytest.mark.deploy_modes("standalone")
async def test_list_resources(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
"""
- Create 20 resources
@@ -82,7 +84,7 @@ async def test_list_resources(
rids = set()
for _ in range(20):
resp = await nucliadb_writer.post(
- f"/{KB_PREFIX}/{knowledgebox}/{RESOURCES_PREFIX}",
+ f"/{KB_PREFIX}/{standalone_knowledgebox}/{RESOURCES_PREFIX}",
json={
"title": "My resource",
},
@@ -91,12 +93,12 @@ async def test_list_resources(
rids.add(resp.json()["uuid"])
got_rids = set()
- resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{knowledgebox}/resources?size=10&page=0")
+ resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{standalone_knowledgebox}/resources?size=10&page=0")
assert resp.status_code == 200
for r in resp.json()["resources"]:
got_rids.add(r["id"])
- resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{knowledgebox}/resources?size=10&page=1")
+ resp = await nucliadb_reader.get(f"/{KB_PREFIX}/{standalone_knowledgebox}/resources?size=10&page=1")
assert resp.status_code == 200
for r in resp.json()["resources"]:
got_rids.add(r["id"])
@@ -104,16 +106,17 @@ async def test_list_resources(
assert got_rids == rids
+@pytest.mark.deploy_modes("standalone")
async def test_get_resource_field(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
slug = "my-resource"
field = "text-field"
resp = await nucliadb_writer.post(
- f"/{KB_PREFIX}/{knowledgebox}/{RESOURCES_PREFIX}",
+ f"/{KB_PREFIX}/{standalone_knowledgebox}/{RESOURCES_PREFIX}",
json={
"slug": slug,
"title": "My Resource",
@@ -123,29 +126,30 @@ async def test_get_resource_field(
assert resp.status_code == 201
rid = resp.json()["uuid"]
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}/text/{field}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}/text/{field}")
assert resp.status_code == 200
body_by_slug = resp.json()
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/slug/{slug}/text/{field}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/slug/{slug}/text/{field}")
assert resp.status_code == 200
body_by_rid = resp.json()
assert body_by_slug == body_by_rid
+@pytest.mark.deploy_modes("standalone")
async def test_resource_creation_slug_conflicts(
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
philosophy_books_kb,
):
"""
Test that creating two resources with the same slug raises a conflict error
"""
slug = "myresource"
- resources_path = f"/{KB_PREFIX}/{{knowledgebox}}/{RESOURCES_PREFIX}"
+ resources_path = f"/{KB_PREFIX}/{{standalone_knowledgebox}}/{RESOURCES_PREFIX}"
resp = await nucliadb_writer.post(
- resources_path.format(knowledgebox=knowledgebox),
+ resources_path.format(standalone_knowledgebox=standalone_knowledgebox),
json={
"slug": slug,
},
@@ -153,7 +157,7 @@ async def test_resource_creation_slug_conflicts(
assert resp.status_code == 201
resp = await nucliadb_writer.post(
- resources_path.format(knowledgebox=knowledgebox),
+ resources_path.format(standalone_knowledgebox=standalone_knowledgebox),
json={
"slug": slug,
},
@@ -162,7 +166,7 @@ async def test_resource_creation_slug_conflicts(
# Creating it in another KB should not raise conflict error
resp = await nucliadb_writer.post(
- resources_path.format(knowledgebox=philosophy_books_kb),
+ resources_path.format(standalone_knowledgebox=philosophy_books_kb),
json={
"slug": slug,
},
@@ -170,13 +174,14 @@ async def test_resource_creation_slug_conflicts(
assert resp.status_code == 201
+@pytest.mark.deploy_modes("standalone")
async def test_title_is_set_automatically_if_not_provided(
- nucliadb_reader,
- nucliadb_writer,
- knowledgebox,
+ nucliadb_reader: AsyncClient,
+ nucliadb_writer: AsyncClient,
+ standalone_knowledgebox,
):
resp = await nucliadb_writer.post(
- f"/{KB_PREFIX}/{knowledgebox}/{RESOURCES_PREFIX}",
+ f"/{KB_PREFIX}/{standalone_knowledgebox}/{RESOURCES_PREFIX}",
json={
"texts": {"text-field": {"body": "test1", "format": "PLAIN"}},
},
@@ -184,22 +189,23 @@ async def test_title_is_set_automatically_if_not_provided(
assert resp.status_code == 201
rid = resp.json()["uuid"]
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}")
assert resp.status_code == 200
body = resp.json()
assert body["title"] == rid
@pytest.mark.parametrize("update_by", ["slug", "uuid"])
+@pytest.mark.deploy_modes("standalone")
async def test_resource_slug_modification(
- nucliadb_reader,
- nucliadb_writer,
- knowledgebox,
+ nucliadb_reader: AsyncClient,
+ nucliadb_writer: AsyncClient,
+ standalone_knowledgebox,
update_by,
):
old_slug = "my-resource"
resp = await nucliadb_writer.post(
- f"/{KB_PREFIX}/{knowledgebox}/{RESOURCES_PREFIX}",
+ f"/{KB_PREFIX}/{standalone_knowledgebox}/{RESOURCES_PREFIX}",
json={
"title": "My Resource",
"slug": old_slug,
@@ -208,14 +214,14 @@ async def test_resource_slug_modification(
assert resp.status_code == 201
rid = resp.json()["uuid"]
- await check_resource(nucliadb_reader, knowledgebox, rid, old_slug)
+ await check_resource(nucliadb_reader, standalone_knowledgebox, rid, old_slug)
# Update the slug
new_slug = "my-resource-2"
if update_by == "slug":
- path = f"/{KB_PREFIX}/{knowledgebox}/slug/{old_slug}"
+ path = f"/{KB_PREFIX}/{standalone_knowledgebox}/slug/{old_slug}"
else:
- path = f"/{KB_PREFIX}/{knowledgebox}/resource/{rid}"
+ path = f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rid}"
resp = await nucliadb_writer.patch(
path,
json={
@@ -225,10 +231,10 @@ async def test_resource_slug_modification(
)
assert resp.status_code == 200
- await check_resource(nucliadb_reader, knowledgebox, rid, new_slug, title="New title")
+ await check_resource(nucliadb_reader, standalone_knowledgebox, rid, new_slug, title="New title")
-async def check_resource(nucliadb_reader, kbid, rid, slug, **body_checks):
+async def check_resource(nucliadb_reader: AsyncClient, kbid, rid, slug, **body_checks):
resp = await nucliadb_reader.get(f"/kb/{kbid}/resource/{rid}")
assert resp.status_code == 200
assert resp.json()["slug"] == slug
@@ -241,14 +247,15 @@ async def check_resource(nucliadb_reader, kbid, rid, slug, **body_checks):
assert body[key] == value
+@pytest.mark.deploy_modes("standalone")
async def test_resource_slug_modification_rollbacks(
- nucliadb_reader,
- nucliadb_writer,
- knowledgebox,
+ nucliadb_reader: AsyncClient,
+ nucliadb_writer: AsyncClient,
+ standalone_knowledgebox,
):
old_slug = "my-resource"
resp = await nucliadb_writer.post(
- f"/{KB_PREFIX}/{knowledgebox}/{RESOURCES_PREFIX}",
+ f"/{KB_PREFIX}/{standalone_knowledgebox}/{RESOURCES_PREFIX}",
json={
"title": "Old title",
"slug": old_slug,
@@ -257,7 +264,7 @@ async def test_resource_slug_modification_rollbacks(
assert resp.status_code == 201
rid = resp.json()["uuid"]
- await check_resource(nucliadb_reader, knowledgebox, rid, old_slug)
+ await check_resource(nucliadb_reader, standalone_knowledgebox, rid, old_slug)
# Mock an error in the sending to process
with mock.patch(
@@ -265,7 +272,7 @@ async def test_resource_slug_modification_rollbacks(
side_effect=HTTPException(status_code=506),
):
resp = await nucliadb_writer.patch(
- f"/{KB_PREFIX}/{knowledgebox}/resource/{rid}",
+ f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rid}",
json={
"slug": "my-resource-2",
"title": "New title",
@@ -274,12 +281,13 @@ async def test_resource_slug_modification_rollbacks(
assert resp.status_code == 506
# Check that slug and title were not updated
- await check_resource(nucliadb_reader, knowledgebox, rid, old_slug, title="New title")
+ await check_resource(nucliadb_reader, standalone_knowledgebox, rid, old_slug, title="New title")
+@pytest.mark.deploy_modes("standalone")
async def test_resource_slug_modification_handles_conflicts(
- nucliadb_writer,
- knowledgebox,
+ nucliadb_writer: AsyncClient,
+ standalone_knowledgebox,
):
rids = []
slugs = []
@@ -287,7 +295,7 @@ async def test_resource_slug_modification_handles_conflicts(
slug = f"my-resource-{i}"
slugs.append(slug)
resp = await nucliadb_writer.post(
- f"/{KB_PREFIX}/{knowledgebox}/{RESOURCES_PREFIX}",
+ f"/{KB_PREFIX}/{standalone_knowledgebox}/{RESOURCES_PREFIX}",
json={
"title": "My Resource",
"slug": slug,
@@ -298,7 +306,7 @@ async def test_resource_slug_modification_handles_conflicts(
rids.append(rid)
# Check that conflicts on slug are detected
- path = f"/{KB_PREFIX}/{knowledgebox}/resource/{rids[0]}"
+ path = f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/{rids[0]}"
resp = await nucliadb_writer.patch(
path,
json={
@@ -308,12 +316,13 @@ async def test_resource_slug_modification_handles_conflicts(
assert resp.status_code == 409
+@pytest.mark.deploy_modes("standalone")
async def test_resource_slug_modification_handles_unknown_resources(
- nucliadb_writer,
- knowledgebox,
+ nucliadb_writer: AsyncClient,
+ standalone_knowledgebox,
):
resp = await nucliadb_writer.patch(
- f"/{KB_PREFIX}/{knowledgebox}/resource/foobar",
+ f"/{KB_PREFIX}/{standalone_knowledgebox}/resource/foobar",
json={
"slug": "foo",
},
@@ -321,9 +330,10 @@ async def test_resource_slug_modification_handles_unknown_resources(
assert resp.status_code == 404
+@pytest.mark.deploy_modes("standalone")
async def test_parallel_dup_resource_creation_raises_conflicts(
- nucliadb_writer,
- knowledgebox,
+ nucliadb_writer: AsyncClient,
+ standalone_knowledgebox,
):
driver = get_driver()
if not isinstance(driver, PGDriver):
@@ -344,7 +354,7 @@ async def create_resource(kbid: str):
# Create 5 requests that attempt to create the same resource with the same slug simultaneously
tasks = []
for _ in range(5):
- tasks.append(asyncio.create_task(create_resource(knowledgebox)))
+ tasks.append(asyncio.create_task(create_resource(standalone_knowledgebox)))
status_codes = await asyncio.gather(*tasks)
# Check that only one succeeded
diff --git a/nucliadb/tests/nucliadb/integration/test_security.py b/nucliadb/tests/nucliadb/integration/test_security.py
index eb20565244..d33ee4f120 100644
--- a/nucliadb/tests/nucliadb/integration/test_security.py
+++ b/nucliadb/tests/nucliadb/integration/test_security.py
@@ -21,14 +21,15 @@
from typing import Optional
import pytest
+from httpx import AsyncClient
PLATFORM_GROUP = "platform"
DEVELOPERS_GROUP = "developers"
@pytest.fixture(scope="function")
-async def resource_with_security(nucliadb_writer, knowledgebox):
- kbid = knowledgebox
+async def resource_with_security(nucliadb_writer: AsyncClient, standalone_knowledgebox: str):
+ kbid = standalone_knowledgebox
resp = await nucliadb_writer.post(
f"/kb/{kbid}/resources",
json={
@@ -45,10 +46,11 @@ async def resource_with_security(nucliadb_writer, knowledgebox):
return resp.json()["uuid"]
+@pytest.mark.deploy_modes("standalone")
async def test_resource_security_is_returned_serialization(
- nucliadb_reader, knowledgebox, resource_with_security
+ nucliadb_reader: AsyncClient, standalone_knowledgebox: str, resource_with_security
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resource_id = resource_with_security
resp = await nucliadb_reader.get(f"/kb/{kbid}/resource/{resource_id}", params={"show": ["security"]})
@@ -57,10 +59,11 @@ async def test_resource_security_is_returned_serialization(
assert set(resource["security"]["access_groups"]) == set([PLATFORM_GROUP, DEVELOPERS_GROUP])
+@pytest.mark.deploy_modes("standalone")
async def test_resource_security_is_updated(
- nucliadb_reader, nucliadb_writer, knowledgebox, resource_with_security
+ nucliadb_reader: AsyncClient, nucliadb_writer, standalone_knowledgebox: str, resource_with_security
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resource_id = resource_with_security
# Update the security of the resource: make it public for all groups
@@ -85,14 +88,15 @@ async def test_resource_security_is_updated(
@pytest.mark.parametrize("search_endpoint", ("find_get", "find_post", "search_get", "search_post"))
+@pytest.mark.deploy_modes("standalone")
async def test_resource_security_search(
- nucliadb_reader,
- nucliadb_writer,
- knowledgebox,
+ nucliadb_reader: AsyncClient,
+ nucliadb_writer: AsyncClient,
+ standalone_knowledgebox: str,
resource_with_security,
search_endpoint,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resource_id = resource_with_security
support_group = "support"
# Add another group to the resource
@@ -173,7 +177,7 @@ async def test_resource_security_search(
async def _test_search_request_with_security(
search_endpoint: str,
- nucliadb_reader,
+ nucliadb_reader: AsyncClient,
kbid: str,
query: str,
security_groups: Optional[list[str]],
diff --git a/nucliadb/tests/nucliadb/integration/test_suggest.py b/nucliadb/tests/nucliadb/integration/test_suggest.py
index 8eb464a6cd..6769fb5486 100644
--- a/nucliadb/tests/nucliadb/integration/test_suggest.py
+++ b/nucliadb/tests/nucliadb/integration/test_suggest.py
@@ -27,20 +27,21 @@
from tests.utils import inject_message
+@pytest.mark.deploy_modes("standalone")
async def test_suggest_paragraphs(
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
"""
Test description:
- Create some resource on a knowledgebox and use the /suggest endpoint
+ Create some resource on a standalone_knowledgebox and use the /suggest endpoint
to search them.
"""
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My resource",
"slug": "myresource",
@@ -50,7 +51,7 @@ async def test_suggest_paragraphs(
assert resp.status_code == 201
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "The little prince",
"slug": "the-little-prince",
@@ -67,7 +68,7 @@ async def test_suggest_paragraphs(
assert resp.status_code == 201
rid2 = resp.json()["uuid"]
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "Thus Spoke Zarathustra",
"slug": "thus-spoke-zarathustra",
@@ -81,14 +82,14 @@ async def test_suggest_paragraphs(
rid3 = resp.json()["uuid"]
# exact match
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=Nietzche")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=Nietzche")
assert resp.status_code == 200
body = resp.json()
assert len(body["paragraphs"]["results"]) == 1
assert body["paragraphs"]["results"][0]["rid"] == rid3
# typo tolerant search
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=princes")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=princes")
assert resp.status_code == 200
body = resp.json()
assert len(body["paragraphs"]["results"]) == 2
@@ -97,7 +98,7 @@ async def test_suggest_paragraphs(
assert {"summary", "title"} == {result["field"] for result in body["paragraphs"]["results"]}
# fuzzy search with distance 1 will only match 'a' from resource 2
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=z")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=z")
assert resp.status_code == 200
body = resp.json()
assert len(body["paragraphs"]["results"]) == 1
@@ -105,14 +106,14 @@ async def test_suggest_paragraphs(
assert body["paragraphs"]["results"][0]["field"] == "summary"
# nonexistent term
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=Hanna+Adrent")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=Hanna+Adrent")
assert resp.status_code == 200
body = resp.json()
assert len(body["paragraphs"]["results"]) == 0
# by field
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/suggest",
+ f"/kb/{standalone_knowledgebox}/suggest",
params={
"query": "prince",
"fields": "a/title",
@@ -125,7 +126,7 @@ async def test_suggest_paragraphs(
# filter by language
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/suggest",
+ f"/kb/{standalone_knowledgebox}/suggest",
params={
"query": "prince",
"filters": "/metadata.language/en",
@@ -138,7 +139,7 @@ async def test_suggest_paragraphs(
# No "prince" appear in any german resource
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/suggest",
+ f"/kb/{standalone_knowledgebox}/suggest",
params={
"query": "prince",
"filters": "/metadata.language/de",
@@ -149,8 +150,9 @@ async def test_suggest_paragraphs(
assert len(body["paragraphs"]["results"]) == 0
+@pytest.mark.deploy_modes("standalone")
async def test_suggest_related_entities(
- nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, knowledgebox, request
+ nucliadb_reader: AsyncClient, nucliadb_writer: AsyncClient, standalone_knowledgebox, request
):
"""
Test description:
@@ -184,7 +186,7 @@ async def test_suggest_related_entities(
for entity, type in entities
]
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "People and places",
"slug": "pap",
@@ -207,64 +209,65 @@ def assert_expected_entities(body, expected):
assert set((e["value"] for e in body["entities"]["entities"])) == expected
# Test simple suggestions
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=Ann")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=Ann")
assert resp.status_code == 200
body = resp.json()
assert_expected_entities(body, {"Anna", "Anthony"})
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=joh")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=joh")
assert resp.status_code == 200
body = resp.json()
assert_expected_entities(body, {"John"})
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=xxxxx")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=xxxxx")
assert resp.status_code == 200
body = resp.json()
assert not body["entities"]["entities"]
# Test correct query tokenization
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=bar")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=bar")
assert resp.status_code == 200
body = resp.json()
assert_expected_entities(body, {"Barcelona", "Bárcenas"})
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=Bar")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=Bar")
assert resp.status_code == 200
body = resp.json()
assert_expected_entities(body, {"Barcelona", "Bárcenas"})
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=BAR")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=BAR")
assert resp.status_code == 200
body = resp.json()
assert_expected_entities(body, {"Barcelona", "Bárcenas"})
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=BÄR")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=BÄR")
assert resp.status_code == 200
body = resp.json()
assert_expected_entities(body, {"Barcelona", "Bárcenas"})
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=BáR")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=BáR")
assert resp.status_code == 200
body = resp.json()
assert_expected_entities(body, {"Barcelona", "Bárcenas"})
# Test multiple word suggest and ordering
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/suggest?query=Solomon+Is")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/suggest?query=Solomon+Is")
assert resp.status_code == 200
body = resp.json()
assert_expected_entities(body, {"Solomon Islands", "Israel"})
+@pytest.mark.deploy_modes("standalone")
async def test_suggestion_on_link_computed_titles_sc6088(
- nucliadb_writer,
- nucliadb_grpc,
- nucliadb_reader,
- knowledgebox,
+ nucliadb_writer: AsyncClient,
+ nucliadb_ingest_grpc: WriterStub,
+ nucliadb_reader: AsyncClient,
+ standalone_knowledgebox,
):
# Create a resource with a link field
link = "http://www.mylink.com"
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await nucliadb_writer.post(
f"/kb/{kbid}/resources",
json={
@@ -291,7 +294,7 @@ async def test_suggestion_on_link_computed_titles_sc6088(
led.title = extracted_title
bm.link_extracted_data.append(led)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
# Check that the resource title changed
resp = await nucliadb_reader.get(f"/kb/{kbid}/resource/{rid}")
@@ -311,10 +314,11 @@ async def test_suggestion_on_link_computed_titles_sc6088(
assert suggested["text"] == extracted_title
+@pytest.mark.deploy_modes("standalone")
async def test_suggest_features(
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
nucliadb_reader: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
texts: dict[str, str],
entities,
):
@@ -341,7 +345,7 @@ def assert_expected_entities(response):
assert set((e["value"] for e in response["entities"]["entities"])) == expected
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/suggest",
+ f"/kb/{standalone_knowledgebox}/suggest",
params={"query": "ann", "features": ["paragraph", "entities"]},
)
assert resp.status_code == 200
@@ -350,7 +354,7 @@ def assert_expected_entities(response):
assert_expected_paragraphs(body)
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/suggest",
+ f"/kb/{standalone_knowledgebox}/suggest",
params={"query": "ann", "features": ["paragraph"]},
)
assert resp.status_code == 200
@@ -359,7 +363,7 @@ def assert_expected_entities(response):
assert_expected_paragraphs(body)
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/suggest", params={"query": "ann", "features": ["entities"]}
+ f"/kb/{standalone_knowledgebox}/suggest", params={"query": "ann", "features": ["entities"]}
)
assert resp.status_code == 200
body = resp.json()
@@ -370,10 +374,10 @@ def assert_expected_entities(response):
@pytest.fixture(scope="function")
async def texts(
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
) -> dict[str, str]:
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My resource",
"slug": "myresource",
@@ -384,7 +388,7 @@ async def texts(
rid1 = resp.json()["uuid"]
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "The little prince",
"slug": "the-little-prince",
@@ -401,7 +405,7 @@ async def texts(
assert resp.status_code == 201
rid2 = resp.json()["uuid"]
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "Thus Spoke Zarathustra",
"slug": "thus-spoke-zarathustra",
@@ -422,7 +426,7 @@ async def texts(
@pytest.fixture(scope="function")
-async def entities(nucliadb_writer: AsyncClient, knowledgebox: str):
+async def entities(nucliadb_writer: AsyncClient, standalone_knowledgebox: str):
collaborators = ["Irene", "Anastasia"]
entities = [
("Anna", "person"),
@@ -449,7 +453,7 @@ async def entities(nucliadb_writer: AsyncClient, knowledgebox: str):
for entity, type in entities
]
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "People and places",
"slug": "pap",
@@ -469,7 +473,8 @@ async def entities(nucliadb_writer: AsyncClient, knowledgebox: str):
assert resp.status_code == 201
-async def test_search_kb_not_found(nucliadb_reader) -> None:
+@pytest.mark.deploy_modes("standalone")
+async def test_search_kb_not_found(nucliadb_reader: AsyncClient) -> None:
resp = await nucliadb_reader.get(
f"/kb/00000000000000/suggest?query=own+text",
)
diff --git a/nucliadb/tests/nucliadb/integration/test_summarize.py b/nucliadb/tests/nucliadb/integration/test_summarize.py
index 0594df8b86..5bb732edcc 100644
--- a/nucliadb/tests/nucliadb/integration/test_summarize.py
+++ b/nucliadb/tests/nucliadb/integration/test_summarize.py
@@ -17,17 +17,19 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
#
+import pytest
from httpx import AsyncClient
from nucliadb_models.search import SummarizedResponse
+@pytest.mark.deploy_modes("standalone")
async def test_summarize(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resource_uuids = []
resource_slugs = []
@@ -61,6 +63,7 @@ async def test_summarize(
assert set(response.resources.keys()) == set(resources)
+@pytest.mark.deploy_modes("standalone")
async def test_summarize_unexisting_kb(
nucliadb_reader: AsyncClient,
):
diff --git a/nucliadb/tests/nucliadb/integration/test_synonyms.py b/nucliadb/tests/nucliadb/integration/test_synonyms.py
index 63dd90087d..a30c63522e 100644
--- a/nucliadb/tests/nucliadb/integration/test_synonyms.py
+++ b/nucliadb/tests/nucliadb/integration/test_synonyms.py
@@ -18,16 +18,18 @@
# along with this program. If not, see .
#
import pytest
+from httpx import AsyncClient
from nucliadb_models.search import SearchOptions
+@pytest.mark.deploy_modes("standalone")
async def test_custom_synonyms_api(
- nucliadb_reader,
- nucliadb_writer,
- knowledgebox,
+ nucliadb_reader: AsyncClient,
+ nucliadb_writer: AsyncClient,
+ standalone_knowledgebox: str,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
synonyms_url = f"/kb/{kbid}/custom-synonyms"
# Delete first
@@ -74,8 +76,10 @@ async def test_custom_synonyms_api(
@pytest.fixture(scope="function")
-async def knowledgebox_with_synonyms(nucliadb_writer, knowledgebox):
- kbid = knowledgebox
+async def standalone_knowledgebox_with_synonyms(
+ nucliadb_writer: AsyncClient, standalone_knowledgebox: str
+):
+ kbid = standalone_knowledgebox
synonyms_url = f"/kb/{kbid}/custom-synonyms"
kb_synonyms = {
"synonyms": {
@@ -87,12 +91,13 @@ async def knowledgebox_with_synonyms(nucliadb_writer, knowledgebox):
yield kbid
+@pytest.mark.deploy_modes("standalone")
async def test_search_with_synonyms(
- nucliadb_reader,
- nucliadb_writer,
- knowledgebox_with_synonyms,
+ nucliadb_reader: AsyncClient,
+ nucliadb_writer: AsyncClient,
+ standalone_knowledgebox_with_synonyms: str,
):
- kbid = knowledgebox_with_synonyms
+ kbid = standalone_knowledgebox_with_synonyms
# Create a resource with:
# - the term on the summary
@@ -188,11 +193,12 @@ def get_pararagraphs(body):
return paragraphs
+@pytest.mark.deploy_modes("standalone")
async def test_search_errors_if_vectors_or_relations_requested(
- nucliadb_reader,
- knowledgebox,
+ nucliadb_reader: AsyncClient,
+ standalone_knowledgebox: str,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
resp = await nucliadb_reader.post(
f"/kb/{kbid}/search",
json=dict(
diff --git a/nucliadb/tests/nucliadb/integration/test_text_field_json.py b/nucliadb/tests/nucliadb/integration/test_text_field_json.py
index 1d07baa889..f74303da03 100644
--- a/nucliadb/tests/nucliadb/integration/test_text_field_json.py
+++ b/nucliadb/tests/nucliadb/integration/test_text_field_json.py
@@ -19,17 +19,19 @@
#
import json
+import pytest
from httpx import AsyncClient
from nucliadb_models.text import TextFormat
+@pytest.mark.deploy_modes("standalone")
async def test_text_field_in_json_format(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
field_id = "json-text"
payload = {"hello": "world"}
@@ -55,12 +57,13 @@ async def test_text_field_in_json_format(
assert json.loads(body["data"]["texts"][field_id]["value"]["body"]) == payload
+@pytest.mark.deploy_modes("standalone")
async def test_text_field_with_invalid_json(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
field_id = "json-text"
invalid_json = '{hello": "world"}'
diff --git a/nucliadb/tests/nucliadb/integration/test_tokens.py b/nucliadb/tests/nucliadb/integration/test_tokens.py
index eec7eaf6ad..dc3f0e190c 100644
--- a/nucliadb/tests/nucliadb/integration/test_tokens.py
+++ b/nucliadb/tests/nucliadb/integration/test_tokens.py
@@ -18,13 +18,15 @@
# along with this program. If not, see .
#
+import pytest
from httpx import AsyncClient
+@pytest.mark.deploy_modes("standalone")
async def test_metadata_tokens_cancelled_by_the_user_sc_3775(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
token = {
"token": "DRAG",
@@ -34,7 +36,7 @@ async def test_metadata_tokens_cancelled_by_the_user_sc_3775(
"cancelled_by_user": True,
}
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"title": "My Resource",
"summary": "My summary",
@@ -55,7 +57,7 @@ async def test_metadata_tokens_cancelled_by_the_user_sc_3775(
# Check cancelled tokens come in resource get
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resource/{rid}",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}",
)
assert resp.status_code == 200
content = resp.json()
@@ -63,14 +65,14 @@ async def test_metadata_tokens_cancelled_by_the_user_sc_3775(
# Check cancelled labels come in resource list
resp = await nucliadb_reader.get(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
)
assert resp.status_code == 200
content = resp.json()
assert content["resources"][0]["fieldmetadata"][0]["token"][0] == token
# Check cancelled labels come in search results
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/search?query=summary")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/search?query=summary")
assert resp.status_code == 200
content = resp.json()
assert content["resources"][rid]["fieldmetadata"][0]["token"][0] == token
diff --git a/nucliadb/tests/nucliadb/integration/test_upload.py b/nucliadb/tests/nucliadb/integration/test_upload.py
index d08bc40ad9..f100fb52a1 100644
--- a/nucliadb/tests/nucliadb/integration/test_upload.py
+++ b/nucliadb/tests/nucliadb/integration/test_upload.py
@@ -19,19 +19,21 @@
#
import base64
+import pytest
from httpx import AsyncClient
from nucliadb.writer.tus import UPLOAD
+@pytest.mark.deploy_modes("standalone")
async def test_upload(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
content = b"Test for /upload endpoint"
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/{UPLOAD}",
+ f"/kb/{standalone_knowledgebox}/{UPLOAD}",
headers={
"X-Filename": base64.b64encode(b"testfile").decode("utf-8"),
"Content-Type": "text/plain",
@@ -51,7 +53,7 @@ async def test_upload(
assert rid
assert field_id
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}/file/{field_id}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}/file/{field_id}")
assert resp.status_code == 200
body = resp.json()
assert body["value"]["file"]["filename"] == "testfile"
@@ -62,17 +64,18 @@ async def test_upload(
assert resp.content == content
+@pytest.mark.deploy_modes("standalone")
async def test_upload_guesses_content_type(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
filename = "testfile.txt"
content = b"Test for /upload endpoint"
content_type = "text/plain"
# Upload the file without specifying the content type
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/{UPLOAD}",
+ f"/kb/{standalone_knowledgebox}/{UPLOAD}",
headers={
"X-Filename": base64.b64encode(filename.encode()).decode("utf-8"),
},
@@ -84,7 +87,7 @@ async def test_upload_guesses_content_type(
field_id = body["field_id"]
# Test that the content type is correctly guessed from the filename
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}/file/{field_id}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}/file/{field_id}")
assert resp.status_code == 200
body = resp.json()
assert body["value"]["file"]["filename"] == filename
diff --git a/nucliadb/tests/nucliadb/integration/test_usermetadata.py b/nucliadb/tests/nucliadb/integration/test_usermetadata.py
index ddc9bbb745..b4236efb4e 100644
--- a/nucliadb/tests/nucliadb/integration/test_usermetadata.py
+++ b/nucliadb/tests/nucliadb/integration/test_usermetadata.py
@@ -17,20 +17,22 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
#
+import pytest
from httpx import AsyncClient
+@pytest.mark.deploy_modes("standalone")
async def test_labels_sc_2053(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
# PUBLIC API
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}")
assert resp.status_code == 200
resp = await nucliadb_writer.post(
- f"/kb/{knowledgebox}/resources",
+ f"/kb/{standalone_knowledgebox}/resources",
json={
"slug": "myresource",
"usermetadata": {"classifications": [{"labelset": "type", "label": "Book"}]},
@@ -39,14 +41,14 @@ async def test_labels_sc_2053(
assert resp.status_code == 201
rid = resp.json()["uuid"]
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}")
assert resp.status_code == 200
assert len(resp.json()["usermetadata"]["classifications"]) == 1
# ADD A LABEL
resp = await nucliadb_writer.patch(
- f"/kb/{knowledgebox}/resource/{rid}",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}",
json={
"usermetadata": {
"classifications": [
@@ -58,16 +60,16 @@ async def test_labels_sc_2053(
)
assert resp.status_code == 200
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}")
assert resp.status_code == 200
assert len(resp.json()["usermetadata"]["classifications"]) == 2
resp = await nucliadb_writer.patch(
- f"/kb/{knowledgebox}/resource/{rid}",
+ f"/kb/{standalone_knowledgebox}/resource/{rid}",
json={"usermetadata": {"classifications": []}},
)
assert resp.status_code == 200
- resp = await nucliadb_reader.get(f"/kb/{knowledgebox}/resource/{rid}")
+ resp = await nucliadb_reader.get(f"/kb/{standalone_knowledgebox}/resource/{rid}")
assert resp.status_code == 200
assert len(resp.json()["usermetadata"]["classifications"]) == 0
diff --git a/nucliadb/tests/nucliadb/integration/test_vectorsets.py b/nucliadb/tests/nucliadb/integration/test_vectorsets.py
index 12930fd130..34d5131937 100644
--- a/nucliadb/tests/nucliadb/integration/test_vectorsets.py
+++ b/nucliadb/tests/nucliadb/integration/test_vectorsets.py
@@ -61,10 +61,11 @@
VECTORSET_DIMENSION = 12
+@pytest.mark.deploy_modes("standalone")
async def test_vectorsets_work_on_a_kb_with_a_single_vectorset(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
kb_with_vectorset: KbSpecs,
):
kbid = kb_with_vectorset.kbid
@@ -113,13 +114,14 @@ async def test_vectorsets_work_on_a_kb_with_a_single_vectorset(
"vectorset,expected",
[(None, "multilingual"), ("", "multilingual"), ("myvectorset", "myvectorset")],
)
+@pytest.mark.deploy_modes("standalone")
async def test_vectorset_parameter_without_default_vectorset(
nucliadb_reader: AsyncClient,
- knowledgebox: str,
- vectorset,
- expected,
+ standalone_knowledgebox: str,
+ vectorset: Optional[str],
+ expected: str,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
calls: list[nodereader_pb2.SearchRequest] = []
@@ -172,13 +174,14 @@ def set_predict_default_vectorset(query_info: QueryInfo) -> QueryInfo:
"vectorset,expected",
[(None, "multilingual"), ("", "multilingual"), ("myvectorset", "myvectorset")],
)
+@pytest.mark.deploy_modes("standalone")
async def test_vectorset_parameter_with_default_vectorset(
nucliadb_reader: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
vectorset,
expected,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
calls: list[nodereader_pb2.SearchRequest] = []
@@ -221,6 +224,7 @@ async def mock_node_query(kbid: str, method, pb_query: nodereader_pb2.SearchRequ
assert calls[-1].vectorset == expected
+@pytest.mark.deploy_modes("standalone")
async def test_querying_kb_with_vectorsets(
mocker: MockerFixture,
storage: Storage,
@@ -228,7 +232,7 @@ async def test_querying_kb_with_vectorsets(
shard_manager,
learning_config,
indexing_utility,
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
nucliadb_reader: AsyncClient,
dummy_predict: DummyPredictEngine,
):
@@ -288,7 +292,7 @@ async def inner(*args, **kwargs):
rid = uuid.uuid4().hex
field_id = "my-field"
bm = create_broker_message_with_vectorsets(kbid, rid, field_id, [("model", 768)])
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
with (
patch.dict(utils.METHODS, {utils.Method.SEARCH: query_shard_wrapper}, clear=True),
@@ -356,7 +360,7 @@ async def inner(*args, **kwargs):
bm = create_broker_message_with_vectorsets(
kbid, rid, field_id, [("model-A", 768), ("model-B", 1024)]
)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
with (
patch.dict(utils.METHODS, {utils.Method.SEARCH: query_shard_wrapper}, clear=True),
diff --git a/nucliadb/tests/nucliadb/integration/test_vectorsets_api.py b/nucliadb/tests/nucliadb/integration/test_vectorsets_api.py
index ab70d0a038..4dcf3ad6c6 100644
--- a/nucliadb/tests/nucliadb/integration/test_vectorsets_api.py
+++ b/nucliadb/tests/nucliadb/integration/test_vectorsets_api.py
@@ -48,12 +48,13 @@
MODULE = "nucliadb.writer.api.v1.vectorsets"
+@pytest.mark.deploy_modes("standalone")
async def test_vectorsets_crud(
- nucliadb_manager: AsyncClient,
+ nucliadb_writer: AsyncClient,
nucliadb_reader: AsyncClient,
- knowledgebox,
+ standalone_knowledgebox,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
vectorset_id = "en-2024-04-24"
existing_lconfig = LearningConfiguration(
semantic_model="multilingual",
@@ -97,7 +98,7 @@ async def test_vectorsets_crud(
],
):
# Add the vectorset
- resp = await nucliadb_manager.post(f"/kb/{kbid}/vectorsets/{vectorset_id}")
+ resp = await nucliadb_writer.post(f"/kb/{kbid}/vectorsets/{vectorset_id}")
assert resp.status_code == 201, resp.text
# Check that the vectorset has been created with the correct configuration
@@ -124,7 +125,7 @@ async def test_vectorsets_crud(
],
):
# Delete the vectorset
- resp = await nucliadb_manager.delete(f"/kb/{kbid}/vectorsets/{vectorset_id}")
+ resp = await nucliadb_writer.delete(f"/kb/{kbid}/vectorsets/{vectorset_id}")
assert resp.status_code == 204, resp.text
# Check that the vectorset has been deleted
@@ -145,7 +146,7 @@ async def test_vectorsets_crud(
],
):
# Deleting your last vectorset is not allowed
- resp = await nucliadb_manager.delete(f"/kb/{kbid}/vectorsets/multilingual")
+ resp = await nucliadb_writer.delete(f"/kb/{kbid}/vectorsets/multilingual")
assert resp.status_code == 409, resp.text
assert "Deletion of your last vectorset is not allowed" in resp.json()["detail"]
@@ -156,7 +157,7 @@ async def test_vectorsets_crud(
],
):
# But deleting twice is okay
- resp = await nucliadb_manager.delete(f"/kb/{kbid}/vectorsets/{vectorset_id}")
+ resp = await nucliadb_writer.delete(f"/kb/{kbid}/vectorsets/{vectorset_id}")
# XXX: however, we get the same error as before due to our lazy
# check strategy. This shuold be a 200
assert resp.status_code == 409, resp.text
@@ -170,37 +171,39 @@ async def test_vectorsets_crud(
],
):
# Add and delete the vectorset again
- resp = await nucliadb_manager.post(f"/kb/{kbid}/vectorsets/{vectorset_id}")
+ resp = await nucliadb_writer.post(f"/kb/{kbid}/vectorsets/{vectorset_id}")
assert resp.status_code == 201, resp.text
- resp = await nucliadb_manager.delete(f"/kb/{kbid}/vectorsets/{vectorset_id}")
+ resp = await nucliadb_writer.delete(f"/kb/{kbid}/vectorsets/{vectorset_id}")
assert resp.status_code == 204, resp.text
+@pytest.mark.deploy_modes("standalone")
async def test_learning_config_errors_are_proxied_correctly(
- nucliadb_manager: AsyncClient,
- knowledgebox,
+ nucliadb_writer: AsyncClient,
+ standalone_knowledgebox,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
with patch(
f"{MODULE}.learning_proxy.get_configuration",
side_effect=ProxiedLearningConfigError(
status_code=500, content="Learning Internal Server Error"
),
):
- resp = await nucliadb_manager.post(f"/kb/{kbid}/vectorsets/foo")
+ resp = await nucliadb_writer.post(f"/kb/{kbid}/vectorsets/foo")
assert resp.status_code == 500
assert resp.json() == {"detail": "Learning Internal Server Error"}
- resp = await nucliadb_manager.delete(f"/kb/{kbid}/vectorsets/foo")
+ resp = await nucliadb_writer.delete(f"/kb/{kbid}/vectorsets/foo")
assert resp.status_code == 500
assert resp.json() == {"detail": "Learning Internal Server Error"}
@pytest.mark.parametrize("bwc_with_default_vectorset", [True, False])
+@pytest.mark.deploy_modes("standalone")
async def test_vectorset_migration(
- nucliadb_manager: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
+ nucliadb_writer_manager: AsyncClient,
+ nucliadb_ingest_grpc: WriterStub,
nucliadb_reader: AsyncClient,
bwc_with_default_vectorset: bool,
):
@@ -210,7 +213,7 @@ async def test_vectorset_migration(
"""
# Create a KB
- resp = await nucliadb_manager.post(
+ resp = await nucliadb_writer_manager.post(
"/kbs",
json={
"title": "migrationexamples",
@@ -275,7 +278,7 @@ async def test_vectorset_migration(
bmb.add_field_builder(link_field)
bm = bmb.build()
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
# Make a search and check that the document is found
await _check_search(nucliadb_reader, kbid)
@@ -283,7 +286,7 @@ async def test_vectorset_migration(
# Now add a new vectorset
vectorset_id = "en-2024-05-06"
resp = await add_vectorset(
- nucliadb_manager, kbid, vectorset_id, similarity=SimilarityFunction.COSINE, vector_dimension=1024
+ nucliadb_writer, kbid, vectorset_id, similarity=SimilarityFunction.COSINE, vector_dimension=1024
)
assert resp.status_code == 201
@@ -309,7 +312,7 @@ async def test_vectorset_migration(
ev.vectors.vectors.vectors.append(vector)
bm2.field_vectors.append(ev)
- await inject_message(nucliadb_grpc, bm2)
+ await inject_message(nucliadb_ingest_grpc, bm2)
# Make a search with the new vectorset and check that the document is found
await _check_search(nucliadb_reader, kbid, vectorset="en-2024-05-06")
diff --git a/nucliadb/tests/nucliadb/integration/test_visual_selections.py b/nucliadb/tests/nucliadb/integration/test_visual_selections.py
index 03a8a14194..aeb37813ca 100644
--- a/nucliadb/tests/nucliadb/integration/test_visual_selections.py
+++ b/nucliadb/tests/nucliadb/integration/test_visual_selections.py
@@ -36,9 +36,9 @@
@pytest.fixture(scope="function")
async def annotated_file_field(
nucliadb_writer: AsyncClient,
- knowledgebox: str,
+ standalone_knowledgebox: str,
):
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
field_id = "invoice"
with open(INVOICE_FILENAME, "rb") as f:
@@ -95,8 +95,11 @@ async def annotated_file_field(
yield (rid, field_id)
-async def test_visual_selection(nucliadb_reader: AsyncClient, knowledgebox: str, annotated_file_field):
- kbid = knowledgebox
+@pytest.mark.deploy_modes("standalone")
+async def test_visual_selection(
+ nucliadb_reader: AsyncClient, standalone_knowledgebox: str, annotated_file_field
+):
+ kbid = standalone_knowledgebox
rid, field_id = annotated_file_field
resp = await nucliadb_reader.get(
diff --git a/nucliadb/tests/nucliadb/knowledgeboxes/philosophy_books.py b/nucliadb/tests/nucliadb/knowledgeboxes/philosophy_books.py
index 0589aef878..d1f5c88fbb 100644
--- a/nucliadb/tests/nucliadb/knowledgeboxes/philosophy_books.py
+++ b/nucliadb/tests/nucliadb/knowledgeboxes/philosophy_books.py
@@ -24,7 +24,7 @@
@pytest.fixture(scope="function")
async def philosophy_books_kb(
- nucliadb_manager: AsyncClient,
+ nucliadb_writer_manager: AsyncClient,
nucliadb_writer: AsyncClient,
):
payloads = [
@@ -181,7 +181,7 @@ async def philosophy_books_kb(
},
]
- resp = await nucliadb_manager.post("/kbs", json={"slug": "philosophy-books"})
+ resp = await nucliadb_writer_manager.post("/kbs", json={"slug": "philosophy-books"})
assert resp.status_code == 201
kbid = resp.json().get("uuid")
@@ -194,5 +194,5 @@ async def philosophy_books_kb(
yield kbid
- resp = await nucliadb_manager.delete(f"/kb/{kbid}")
+ resp = await nucliadb_writer_manager.delete(f"/kb/{kbid}")
assert resp.status_code == 200
diff --git a/nucliadb/tests/nucliadb/knowledgeboxes/ten_dummy_resources.py b/nucliadb/tests/nucliadb/knowledgeboxes/ten_dummy_resources.py
index 265d0e313b..e2f647e149 100644
--- a/nucliadb/tests/nucliadb/knowledgeboxes/ten_dummy_resources.py
+++ b/nucliadb/tests/nucliadb/knowledgeboxes/ten_dummy_resources.py
@@ -25,7 +25,7 @@
@pytest.fixture(scope="function")
async def ten_dummy_resources_kb(
- nucliadb_manager: AsyncClient,
+ nucliadb_writer_manager: AsyncClient,
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
):
@@ -40,7 +40,7 @@ async def ten_dummy_resources_kb(
for i in range(N_RESOURCES)
]
- resp = await nucliadb_manager.post("/kbs", json={"slug": "ten-dummy-resources"})
+ resp = await nucliadb_writer_manager.post("/kbs", json={"slug": "ten-dummy-resources"})
assert resp.status_code == 201
kbid = resp.json().get("uuid")
@@ -62,5 +62,5 @@ async def ten_dummy_resources_kb(
yield kbid
- resp = await nucliadb_manager.delete(f"/kb/{kbid}")
+ resp = await nucliadb_writer_manager.delete(f"/kb/{kbid}")
assert resp.status_code == 200
diff --git a/nucliadb/tests/nucliadb/knowledgeboxes/vectorsets.py b/nucliadb/tests/nucliadb/knowledgeboxes/vectorsets.py
index 1577878f82..5eebcf2fdf 100644
--- a/nucliadb/tests/nucliadb/knowledgeboxes/vectorsets.py
+++ b/nucliadb/tests/nucliadb/knowledgeboxes/vectorsets.py
@@ -39,18 +39,17 @@ class KbSpecs:
@pytest.fixture(scope="function")
async def kb_with_vectorset(
- nucliadb_manager: AsyncClient,
nucliadb_writer: AsyncClient,
- nucliadb_grpc: WriterStub,
- knowledgebox: str,
+ nucliadb_ingest_grpc: WriterStub,
+ standalone_knowledgebox: str,
) -> AsyncIterable[KbSpecs]:
- # Now knowledgeboxes in standalone are already created with a single vectorset.
+ # Now standalone_knowledgeboxes in standalone are already created with a single vectorset.
# By default it's the multilingual one (see mock predict implementation).
- kbid = knowledgebox
+ kbid = standalone_knowledgebox
vectorset_id = "multilingual"
vectorset_dimension = 512
await inject_broker_message_with_vectorset_data(
- nucliadb_grpc,
+ nucliadb_ingest_grpc,
kbid,
vectorset_id,
vectorset_dimension=vectorset_dimension,
@@ -64,7 +63,7 @@ async def kb_with_vectorset(
async def inject_broker_message_with_vectorset_data(
- nucliadb_grpc: WriterStub,
+ nucliadb_ingest_grpc: WriterStub,
kbid: str,
vectorset_id: str,
*,
@@ -83,4 +82,4 @@ async def inject_broker_message_with_vectorset_data(
default_vectorset_dimension=default_vector_dimension,
vectorset_dimension=vectorset_dimension,
)
- await inject_message(nucliadb_grpc, bm)
+ await inject_message(nucliadb_ingest_grpc, bm)
diff --git a/nucliadb/tests/utils/vectorsets.py b/nucliadb/tests/utils/vectorsets.py
index eec8c492e8..693e2056dc 100644
--- a/nucliadb/tests/utils/vectorsets.py
+++ b/nucliadb/tests/utils/vectorsets.py
@@ -33,7 +33,7 @@
async def add_vectorset(
- nucliadb_manager: AsyncClient,
+ nucliadb_writer: AsyncClient,
kbid: str,
vectorset_id: str,
*,
@@ -92,5 +92,5 @@ async def add_vectorset(
"nucliadb.writer.api.v1.vectorsets.learning_proxy.update_configuration",
),
):
- resp = await nucliadb_manager.post(f"/kb/{kbid}/vectorsets/{vectorset_id}")
+ resp = await nucliadb_writer.post(f"/kb/{kbid}/vectorsets/{vectorset_id}")
return resp