-
Notifications
You must be signed in to change notification settings - Fork 198
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Refactored delete documents into its own file Ran tensor search unit tests - passed (besides a randomly failing one) * Refactored a deletion interface. At parity, in terms of delete tests * swapped delete by query with bulk delete. untested * combined components together * used existing tensor_search entrypoint function for minimal interface disruption * added label for data-layer agnostic logic * added more tests * Overwrite files from mainline * Overwrite files from mainline * added tests for config.backend * added env var for delete docs request * added tests for read_env_vars_and_defaults_ints * fixed read_env_vars_and_defaults, added mock environ test * standardised the call from api.delete_docs to tensor_search.delete_documents
- Loading branch information
Showing
13 changed files
with
728 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
""" | ||
This module handles the delete documents endpoint | ||
""" | ||
import datetime | ||
import json | ||
from marqo._httprequests import HttpRequests | ||
from marqo.config import Config | ||
from marqo.tensor_search import validation, utils, enums | ||
from marqo.tensor_search.models.delete_docs_objects import MqDeleteDocsResponse, MqDeleteDocsRequest | ||
|
||
# -- Marqo delete endpoint interface: -- | ||
|
||
|
||
def format_delete_docs_response(marqo_response: MqDeleteDocsResponse) -> dict: | ||
"""This formats the delete response for users """ | ||
return { | ||
"index_name": marqo_response.index_name, "status": marqo_response.status_string, | ||
"type": "documentDeletion", "details": { | ||
"receivedDocumentIds": len(marqo_response.document_ids), | ||
"deletedDocuments": marqo_response.deleted_docments_count, | ||
}, | ||
"duration": utils.create_duration_string(marqo_response.deletion_end - marqo_response.deletion_start), | ||
"startedAt": utils.format_timestamp(marqo_response.deletion_start), | ||
"finishedAt": utils.format_timestamp(marqo_response.deletion_end), | ||
} | ||
|
||
|
||
# -- Data-layer agnostic logic -- | ||
|
||
|
||
def delete_documents(config: Config, del_request: MqDeleteDocsRequest) -> dict: | ||
"""entrypoint function for deleting documents""" | ||
|
||
validation.validate_delete_docs_request( | ||
delete_request=del_request, | ||
max_delete_docs_count=utils.read_env_vars_and_defaults_ints(enums.EnvVars.MARQO_MAX_DELETE_DOCS_COUNT) | ||
) | ||
|
||
if config.backend == enums.SearchDb.opensearch: | ||
del_response: MqDeleteDocsResponse = delete_documents_marqo_os(config=config, deletion_instruction=del_request) | ||
else: | ||
raise RuntimeError(f"Config set to use unknown backend `{config.backend}`. " | ||
f"See tensor_search.enums.SearchDB for allowed backends") | ||
|
||
return format_delete_docs_response(del_response) | ||
|
||
|
||
# -- Marqo-OS-specific deletion implementation: -- | ||
|
||
|
||
def delete_documents_marqo_os(config: Config, deletion_instruction: MqDeleteDocsRequest) -> MqDeleteDocsResponse: | ||
"""Deletes documents """ | ||
|
||
# Prepare bulk delete request body | ||
bulk_request_body = "" | ||
for doc_id in deletion_instruction.document_ids: | ||
bulk_request_body += json.dumps({"delete": {"_index": deletion_instruction.index_name, "_id": doc_id}}) + "\n" | ||
|
||
# Send bulk delete request | ||
t0 = datetime.datetime.utcnow() | ||
delete_res_backend = HttpRequests(config=config).post( | ||
path="_bulk", | ||
body=bulk_request_body, | ||
) | ||
|
||
if deletion_instruction.auto_refresh: | ||
refresh_response = HttpRequests(config).post(path=f"{deletion_instruction.index_name}/_refresh") | ||
|
||
t1 = datetime.datetime.utcnow() | ||
deleted_documents_count = sum(1 for item in delete_res_backend["items"] if "delete" in item and item["delete"]["status"] == 200) | ||
|
||
mq_delete_res = MqDeleteDocsResponse( | ||
index_name=deletion_instruction.index_name, status_string='succeeded', document_ids=deletion_instruction.document_ids, | ||
deleted_docments_count=deleted_documents_count, deletion_start=t0, | ||
deletion_end=t1 | ||
) | ||
return mq_delete_res |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
""" | ||
This module holds the classes which define the interface the delete documents | ||
endpoint. | ||
""" | ||
|
||
import datetime | ||
from typing import NamedTuple, Literal, List | ||
|
||
|
||
class MqDeleteDocsResponse(NamedTuple): | ||
"""An object that holds the data we send back to users""" | ||
index_name: str | ||
status_string: Literal["succeeded"] | ||
document_ids: List[str] | ||
deleted_docments_count: int | ||
deletion_start: datetime.datetime | ||
deletion_end: datetime.datetime | ||
|
||
|
||
class MqDeleteDocsRequest(NamedTuple): | ||
"""An object that holds the data from users for a delete request""" | ||
index_name: str | ||
document_ids: List[str] | ||
auto_refresh: bool |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.