Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/mx-1670 backend api connector #263

Merged
merged 6 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- added `BackendApiConnector` methods to cover all current (and near future) endpoints:
`fetch_extracted_items`, `fetch_merged_items`, `get_merged_item`,
`preview_merged_item` and `get_rule_set`
- complete the list of exported names in `models` and `types` modules

### Changes

- update cruft and loosen up pyproject dependencies

### Deprecated

- deprecated `BackendApiConnector.post_models` in favor of `post_extracted_items`

### Removed

### Fixed

- added the `rki/mex` user-agent to all requests of the HTTPConnector

### Security

## [0.34.0] - 2024-08-12
Expand Down
203 changes: 194 additions & 9 deletions mex/common/backend_api/connector.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
from typing import cast
from urllib.parse import urljoin

from mex.common.backend_api.models import BulkInsertResponse
from requests.exceptions import HTTPError

from mex.common.backend_api.models import (
ExtractedItemsRequest,
ExtractedItemsResponse,
IdentifiersResponse,
MergedItemsResponse,
MergedModelTypeAdapter,
RuleSetResponseTypeAdapter,
)
from mex.common.connector import HTTPConnector
from mex.common.models import AnyExtractedModel
from mex.common.models import (
AnyExtractedModel,
AnyMergedModel,
AnyRuleSetRequest,
AnyRuleSetResponse,
)
from mex.common.settings import BaseSettings
from mex.common.types import AnyExtractedIdentifier

Expand All @@ -27,21 +42,191 @@ def _set_url(self) -> None:
self.url = urljoin(str(settings.backend_api_url), self.API_VERSION)

def post_models(
self, models: list[AnyExtractedModel]
self,
extracted_items: list[AnyExtractedModel],
) -> list[AnyExtractedIdentifier]:
"""Post models to Backend API in a bulk insertion request.
"""Post extracted models to the backend in bulk.

Args:
models: Extracted or merged models to post
extracted_items: Extracted models to post

Raises:
HTTPError: If insert was not accepted, crashes or times out
HTTPError: If post was not accepted, crashes or times out

Returns:
Identifiers of posted extracted models
"""
# XXX deprecated method, please use `post_extracted_models` instead
return cast(
list[AnyExtractedIdentifier],
self.post_extracted_items(extracted_items).identifiers,
)

def post_extracted_items(
self,
extracted_items: list[AnyExtractedModel],
) -> IdentifiersResponse:
"""Post extracted items to the backend in bulk.

Args:
extracted_items: Extracted items to post

Raises:
HTTPError: If post was not accepted, crashes or times out

Returns:
Response model from the endpoint
"""
response = self.request(
method="POST",
endpoint="ingest",
payload=ExtractedItemsRequest(items=extracted_items),
)
return IdentifiersResponse.model_validate(response)

def fetch_extracted_items(
self,
query_string: str | None,
stable_target_id: str | None,
entity_type: list[str] | None,
skip: int,
limit: int,
) -> ExtractedItemsResponse:
"""Fetch extracted items that match the given set of filters.

Args:
query_string: Full-text search query
stable_target_id: The item's stableTargetId
entity_type: The item's entityType
skip: How many items to skip for pagination
limit: How many items to return in one page

Raises:
HTTPError: If search was not accepted, crashes or times out

Returns:
One page of extracted items and the total count that was matched
"""
response = self.request(
method="GET",
endpoint="extracted-item",
params={
"q": query_string,
"stableTargetId": stable_target_id,
"entityType": entity_type,
"skip": str(skip),
"limit": str(limit),
},
)
return ExtractedItemsResponse.model_validate(response)

def fetch_merged_items(
self,
query_string: str | None,
entity_type: list[str] | None,
skip: int,
limit: int,
) -> MergedItemsResponse:
"""Fetch merged items that match the given set of filters.

Args:
query_string: Full-text search query
entity_type: The item's entityType
skip: How many items to skip for pagination
limit: How many items to return in one page

Raises:
HTTPError: If search was not accepted, crashes or times out

Returns:
One page of merged items and the total count that was matched
"""
# XXX this endpoint will only return faux merged items for now (MX-1382)
response = self.request(
method="GET",
endpoint="merged-item",
params={
"q": query_string,
"entityType": entity_type,
"skip": str(skip),
"limit": str(limit),
},
)
return MergedItemsResponse.model_validate(response)

def get_merged_item(
self,
stable_target_id: str,
) -> AnyMergedModel:
"""Return one merged item for the given `stableTargetId`.

Args:
stable_target_id: The merged item's identifier

Raises:
MExError: If no merged item was found

Returns:
A single merged item
"""
# XXX stop-gap until the backend has a proper get merged item endpoint (MX-1669)
response = self.request(
method="GET",
endpoint="merged-item",
params={
"stableTargetId": stable_target_id,
"limit": "1",
},
)
response_model = MergedItemsResponse.model_validate(response)
try:
return response_model.items[0]
except IndexError:
raise HTTPError("merged item was not found") from None

def preview_merged_item(
self,
stable_target_id: str,
rule_set: AnyRuleSetRequest,
) -> AnyMergedModel:
"""Return a preview for merging the given rule-set with stored extracted items.

Args:
stable_target_id: The extracted items' `stableTargetId`
rule_set: A rule-set to use for previewing

Raises:
HTTPError: If preview produces errors, crashes or times out

Returns:
A single merged item
"""
# XXX experimental method until the backend has a preview endpoint (MX-1406)
response = self.request(
method="GET",
endpoint=f"preview-item/{stable_target_id}",
payload=rule_set,
)
return MergedModelTypeAdapter.validate_python(response)

def get_rule_set(
self,
stable_target_id: str,
) -> AnyRuleSetResponse:
"""Return a triple of rules for the given `stableTargetId`.

Args:
stable_target_id: The merged item's identifier

Raises:
HTTPError: If no rule-set was found

Returns:
A set of three rules
"""
# XXX experimental method until the backend has a rule-set endpoint (MX-1416)
response = self.request(
method="POST", endpoint="ingest", payload={"items": models}
method="GET",
endpoint=f"rule-set/{stable_target_id}",
)
insert_response = BulkInsertResponse.model_validate(response)
return insert_response.identifiers
return RuleSetResponseTypeAdapter.validate_python(response)
48 changes: 43 additions & 5 deletions mex/common/backend_api/models.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,46 @@
from mex.common.models import BaseModel
from mex.common.types import AnyExtractedIdentifier
from typing import Annotated

from pydantic import Field, TypeAdapter

class BulkInsertResponse(BaseModel):
"""Response body for the bulk ingestion endpoint."""
from mex.common.models import (
AnyExtractedModel,
AnyMergedModel,
AnyRuleSetResponse,
BaseModel,
)
from mex.common.types import Identifier

identifiers: list[AnyExtractedIdentifier]

class ExtractedItemsRequest(BaseModel):
"""Request model for a list of extracted items."""

items: list[AnyExtractedModel]


class ExtractedItemsResponse(BaseModel):
"""Response model for a list of extracted items including a total count."""

items: list[AnyExtractedModel]
total: int


class MergedItemsResponse(BaseModel):
"""Response model for a list of merged items including a total count."""

items: list[AnyMergedModel]
total: int


class IdentifiersResponse(BaseModel):
"""Response models for a list of identifiers."""

identifiers: list[Identifier]


MergedModelTypeAdapter: TypeAdapter[AnyMergedModel] = TypeAdapter(
Annotated[AnyMergedModel, Field(discriminator="entityType")]
)
RuleSetResponseTypeAdapter: TypeAdapter[AnyRuleSetResponse] = TypeAdapter(
Annotated[AnyRuleSetResponse, Field(discriminator="entityType")]
)
BulkInsertResponse = IdentifiersResponse # deprecated
11 changes: 8 additions & 3 deletions mex/common/connector/http.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
from abc import abstractmethod
from collections.abc import Mapping
from typing import Any, Literal, cast

import backoff
Expand Down Expand Up @@ -48,7 +49,7 @@ def request(
method: Literal["OPTIONS", "POST", "GET", "PUT", "DELETE"],
endpoint: str | None = None,
payload: Any = None,
params: dict[str, str] | None = None,
params: Mapping[str, list[str] | str | None] | None = None,
**kwargs: Any,
) -> dict[str, Any]:
"""Prepare and send a request with error handling and payload de/serialization.
Expand Down Expand Up @@ -77,10 +78,10 @@ def request(
if not kwargs.get("headers"):
kwargs.setdefault("headers", {})
kwargs["headers"].setdefault("Accept", "application/json")
kwargs["headers"].setdefault("User-Agent", "rki/mex")

if payload:
kwargs["data"] = json.dumps(payload, cls=MExEncoder)
kwargs["headers"].setdefault("User-Agent", "rki/mex")

# Send request
response = self._send_request(method, url, params, **kwargs)
Expand Down Expand Up @@ -114,7 +115,11 @@ def request(
)
@backoff.on_exception(backoff.fibo, RequestException, max_tries=6)
def _send_request(
self, method: str, url: str, params: dict[str, str] | None, **kwargs: Any
self,
method: str,
url: str,
params: Mapping[str, list[str] | str | None] | None,
**kwargs: Any,
) -> Response:
"""Send the response with advanced retrying rules."""
return self.session.request(method, url, params, **kwargs)
Expand Down
Loading