Skip to content

Commit

Permalink
feature/mx-1670 backend api connector (#263)
Browse files Browse the repository at this point in the history
# PR Context
- as a prep for MX-1382, MX-1669, MX-1406, MX-1416 and MX-1581

# Added

- added `BackendApiConnector` methods to cover all current (and near
future) endpoints:
  `fetch_extracted_items`, `fetch_merged_items`, `get_merged_item`,
  `preview_merged_item` and `get_rule_set`
- complete the list of exported names in `models` and `types` modules

# Deprecated

- deprecated `BackendApiConnector.post_models` in favor of
`post_extracted_items`

# Fixed

- added the `rki/mex` user-agent to all requests of the HTTPConnector
  • Loading branch information
cutoffthetop authored Aug 20, 2024
1 parent 915406c commit 9b4c9f8
Show file tree
Hide file tree
Showing 12 changed files with 588 additions and 65 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- added `BackendApiConnector` methods to cover all current (and near future) endpoints:
`fetch_extracted_items`, `fetch_merged_items`, `get_merged_item`,
`preview_merged_item` and `get_rule_set`
- complete the list of exported names in `models` and `types` modules

### Changes

### Deprecated

- deprecated `BackendApiConnector.post_models` in favor of `post_extracted_items`

### Removed

- containerize section from release pipeline

### Fixed

- added the `rki/mex` user-agent to all requests of the HTTPConnector

### Security

## [0.35.0] - 2024-08-20
Expand Down
203 changes: 194 additions & 9 deletions mex/common/backend_api/connector.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
from typing import cast
from urllib.parse import urljoin

from mex.common.backend_api.models import BulkInsertResponse
from requests.exceptions import HTTPError

from mex.common.backend_api.models import (
ExtractedItemsRequest,
ExtractedItemsResponse,
IdentifiersResponse,
MergedItemsResponse,
MergedModelTypeAdapter,
RuleSetResponseTypeAdapter,
)
from mex.common.connector import HTTPConnector
from mex.common.models import AnyExtractedModel
from mex.common.models import (
AnyExtractedModel,
AnyMergedModel,
AnyRuleSetRequest,
AnyRuleSetResponse,
)
from mex.common.settings import BaseSettings
from mex.common.types import AnyExtractedIdentifier

Expand All @@ -27,21 +42,191 @@ def _set_url(self) -> None:
self.url = urljoin(str(settings.backend_api_url), self.API_VERSION)

def post_models(
self, models: list[AnyExtractedModel]
self,
extracted_items: list[AnyExtractedModel],
) -> list[AnyExtractedIdentifier]:
"""Post models to Backend API in a bulk insertion request.
"""Post extracted models to the backend in bulk.
Args:
models: Extracted or merged models to post
extracted_items: Extracted models to post
Raises:
HTTPError: If insert was not accepted, crashes or times out
HTTPError: If post was not accepted, crashes or times out
Returns:
Identifiers of posted extracted models
"""
# XXX deprecated method, please use `post_extracted_models` instead
return cast(
list[AnyExtractedIdentifier],
self.post_extracted_items(extracted_items).identifiers,
)

def post_extracted_items(
self,
extracted_items: list[AnyExtractedModel],
) -> IdentifiersResponse:
"""Post extracted items to the backend in bulk.
Args:
extracted_items: Extracted items to post
Raises:
HTTPError: If post was not accepted, crashes or times out
Returns:
Response model from the endpoint
"""
response = self.request(
method="POST",
endpoint="ingest",
payload=ExtractedItemsRequest(items=extracted_items),
)
return IdentifiersResponse.model_validate(response)

def fetch_extracted_items(
self,
query_string: str | None,
stable_target_id: str | None,
entity_type: list[str] | None,
skip: int,
limit: int,
) -> ExtractedItemsResponse:
"""Fetch extracted items that match the given set of filters.
Args:
query_string: Full-text search query
stable_target_id: The item's stableTargetId
entity_type: The item's entityType
skip: How many items to skip for pagination
limit: How many items to return in one page
Raises:
HTTPError: If search was not accepted, crashes or times out
Returns:
One page of extracted items and the total count that was matched
"""
response = self.request(
method="GET",
endpoint="extracted-item",
params={
"q": query_string,
"stableTargetId": stable_target_id,
"entityType": entity_type,
"skip": str(skip),
"limit": str(limit),
},
)
return ExtractedItemsResponse.model_validate(response)

def fetch_merged_items(
self,
query_string: str | None,
entity_type: list[str] | None,
skip: int,
limit: int,
) -> MergedItemsResponse:
"""Fetch merged items that match the given set of filters.
Args:
query_string: Full-text search query
entity_type: The item's entityType
skip: How many items to skip for pagination
limit: How many items to return in one page
Raises:
HTTPError: If search was not accepted, crashes or times out
Returns:
One page of merged items and the total count that was matched
"""
# XXX this endpoint will only return faux merged items for now (MX-1382)
response = self.request(
method="GET",
endpoint="merged-item",
params={
"q": query_string,
"entityType": entity_type,
"skip": str(skip),
"limit": str(limit),
},
)
return MergedItemsResponse.model_validate(response)

def get_merged_item(
self,
stable_target_id: str,
) -> AnyMergedModel:
"""Return one merged item for the given `stableTargetId`.
Args:
stable_target_id: The merged item's identifier
Raises:
MExError: If no merged item was found
Returns:
A single merged item
"""
# XXX stop-gap until the backend has a proper get merged item endpoint (MX-1669)
response = self.request(
method="GET",
endpoint="merged-item",
params={
"stableTargetId": stable_target_id,
"limit": "1",
},
)
response_model = MergedItemsResponse.model_validate(response)
try:
return response_model.items[0]
except IndexError:
raise HTTPError("merged item was not found") from None

def preview_merged_item(
self,
stable_target_id: str,
rule_set: AnyRuleSetRequest,
) -> AnyMergedModel:
"""Return a preview for merging the given rule-set with stored extracted items.
Args:
stable_target_id: The extracted items' `stableTargetId`
rule_set: A rule-set to use for previewing
Raises:
HTTPError: If preview produces errors, crashes or times out
Returns:
A single merged item
"""
# XXX experimental method until the backend has a preview endpoint (MX-1406)
response = self.request(
method="GET",
endpoint=f"preview-item/{stable_target_id}",
payload=rule_set,
)
return MergedModelTypeAdapter.validate_python(response)

def get_rule_set(
self,
stable_target_id: str,
) -> AnyRuleSetResponse:
"""Return a triple of rules for the given `stableTargetId`.
Args:
stable_target_id: The merged item's identifier
Raises:
HTTPError: If no rule-set was found
Returns:
A set of three rules
"""
# XXX experimental method until the backend has a rule-set endpoint (MX-1416)
response = self.request(
method="POST", endpoint="ingest", payload={"items": models}
method="GET",
endpoint=f"rule-set/{stable_target_id}",
)
insert_response = BulkInsertResponse.model_validate(response)
return insert_response.identifiers
return RuleSetResponseTypeAdapter.validate_python(response)
48 changes: 43 additions & 5 deletions mex/common/backend_api/models.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,46 @@
from mex.common.models import BaseModel
from mex.common.types import AnyExtractedIdentifier
from typing import Annotated

from pydantic import Field, TypeAdapter

class BulkInsertResponse(BaseModel):
"""Response body for the bulk ingestion endpoint."""
from mex.common.models import (
AnyExtractedModel,
AnyMergedModel,
AnyRuleSetResponse,
BaseModel,
)
from mex.common.types import Identifier

identifiers: list[AnyExtractedIdentifier]

class ExtractedItemsRequest(BaseModel):
"""Request model for a list of extracted items."""

items: list[AnyExtractedModel]


class ExtractedItemsResponse(BaseModel):
"""Response model for a list of extracted items including a total count."""

items: list[AnyExtractedModel]
total: int


class MergedItemsResponse(BaseModel):
"""Response model for a list of merged items including a total count."""

items: list[AnyMergedModel]
total: int


class IdentifiersResponse(BaseModel):
"""Response models for a list of identifiers."""

identifiers: list[Identifier]


MergedModelTypeAdapter: TypeAdapter[AnyMergedModel] = TypeAdapter(
Annotated[AnyMergedModel, Field(discriminator="entityType")]
)
RuleSetResponseTypeAdapter: TypeAdapter[AnyRuleSetResponse] = TypeAdapter(
Annotated[AnyRuleSetResponse, Field(discriminator="entityType")]
)
BulkInsertResponse = IdentifiersResponse # deprecated
11 changes: 8 additions & 3 deletions mex/common/connector/http.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
from abc import abstractmethod
from collections.abc import Mapping
from typing import Any, Literal, cast

import backoff
Expand Down Expand Up @@ -48,7 +49,7 @@ def request(
method: Literal["OPTIONS", "POST", "GET", "PUT", "DELETE"],
endpoint: str | None = None,
payload: Any = None,
params: dict[str, str] | None = None,
params: Mapping[str, list[str] | str | None] | None = None,
**kwargs: Any,
) -> dict[str, Any]:
"""Prepare and send a request with error handling and payload de/serialization.
Expand Down Expand Up @@ -77,10 +78,10 @@ def request(
if not kwargs.get("headers"):
kwargs.setdefault("headers", {})
kwargs["headers"].setdefault("Accept", "application/json")
kwargs["headers"].setdefault("User-Agent", "rki/mex")

if payload:
kwargs["data"] = json.dumps(payload, cls=MExEncoder)
kwargs["headers"].setdefault("User-Agent", "rki/mex")

# Send request
response = self._send_request(method, url, params, **kwargs)
Expand Down Expand Up @@ -114,7 +115,11 @@ def request(
)
@backoff.on_exception(backoff.fibo, RequestException, max_tries=6)
def _send_request(
self, method: str, url: str, params: dict[str, str] | None, **kwargs: Any
self,
method: str,
url: str,
params: Mapping[str, list[str] | str | None] | None,
**kwargs: Any,
) -> Response:
"""Send the response with advanced retrying rules."""
return self.session.request(method, url, params, **kwargs)
Expand Down
Loading

0 comments on commit 9b4c9f8

Please sign in to comment.