Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Structured logging for DSR requests #4594

Merged
merged 12 commits into from
Feb 14, 2024
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ The types of changes are:
- Access and erasure support for Talkable [#4589](https://github.com/ethyca/fides/pull/4589)
- Support temporary credentials in AWS generate + scan features [#4607](https://github.com/ethyca/fides/pull/4603), [#4608](https://github.com/ethyca/fides/pull/4608)
- Add ability to store and read Fides cookie in Base64 format [#4556](https://github.com/ethyca/fides/pull/4556)
- Structured logging for SaaS connector requests [#4594](https://github.com/ethyca/fides/pull/4594)

### Fixed

Expand Down
6 changes: 5 additions & 1 deletion src/fides/api/graph/traversal.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
)
from fides.api.graph.graph import DatasetGraph, Edge, Node
from fides.api.util.collection_util import Row, append
from fides.api.util.logger_context_utils import Contextualizable, LoggerContextKeys
from fides.api.util.matching_queue import MatchingQueue

Datastore = Dict[CollectionAddress, List[Row]]
"""A type expressing retrieved rows of data from a specified collection"""


class TraversalNode:
class TraversalNode(Contextualizable):
"""Base traversal traversal_node type. This type will never be used directly."""

def __init__(self, node: Node):
Expand Down Expand Up @@ -161,6 +162,9 @@ def debug(self) -> Dict[str, Any]:
"to": {k: set(v) for k, v in to.items()},
}

def get_log_context(self) -> Dict[LoggerContextKeys, Any]:
return {LoggerContextKeys.collection: self.node.collection.name}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice, i think this ends up being quite elegant since these additional pieces of context are composed when passed to a function that's decorated. i really like it! 👏



def artificial_traversal_node(address: CollectionAddress) -> TraversalNode:
"""generate an 'artificial' traversal_node pointing to the given address. This is used to
Expand Down
8 changes: 7 additions & 1 deletion src/fides/api/models/privacy_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
from fides.api.util.collection_util import Row
from fides.api.util.constants import API_DATE_FORMAT
from fides.api.util.identity_verification import IdentityVerificationMixin
from fides.api.util.logger_context_utils import Contextualizable, LoggerContextKeys
from fides.common.api.scope_registry import PRIVACY_REQUEST_CALLBACK_RESUME
from fides.config import CONFIG

Expand Down Expand Up @@ -181,7 +182,9 @@ def generate_request_callback_jwe(webhook: PolicyPreWebhook) -> str:
)


class PrivacyRequest(IdentityVerificationMixin, Base): # pylint: disable=R0904
class PrivacyRequest(
IdentityVerificationMixin, Contextualizable, Base
): # pylint: disable=R0904
"""
The DB ORM model to describe current and historic PrivacyRequests.
A privacy request is a database record representing the request's
Expand Down Expand Up @@ -950,6 +953,9 @@ def error_processing(self, db: Session) -> None:
db=db, data={"message_sent": False, "privacy_request_id": self.id}
)

def get_log_context(self) -> Dict[LoggerContextKeys, Any]:
return {LoggerContextKeys.privacy_request_id: self.id}


class PrivacyRequestError(Base):
"""The DB ORM model to track PrivacyRequests error message status."""
Expand Down
64 changes: 29 additions & 35 deletions src/fides/api/service/connectors/saas/authenticated_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
RateLimiterPeriod,
RateLimiterRequest,
)
from fides.api.util.logger_context_utils import (
connection_exception_details,
request_details,
)
from fides.api.util.saas_util import deny_unsafe_hosts
from fides.config import CONFIG

Expand Down Expand Up @@ -100,7 +104,7 @@ def retry_send( # type: ignore
def decorator(func: Callable) -> Callable:
@wraps(func)
def result(*args: Any, **kwargs: Any) -> Response:
self = args[0]
self: AuthenticatedClient = args[0]
last_exception: Optional[Union[BaseException, Exception]] = None

for attempt in range(retry_count + 1):
Expand Down Expand Up @@ -129,6 +133,9 @@ def result(*args: Any, **kwargs: Any) -> Response:
last_exception = ConnectionException(
f"Operational Error connecting to '{self.configuration.key}'{dev_mode_log}"
)
logger.bind(
**connection_exception_details(exc, self.uri)
).error("Connector request failed.")
# requests library can raise ConnectionError, Timeout or TooManyRedirects
# we will not retry these as they don't usually point to intermittent issues
break
Expand Down Expand Up @@ -214,22 +221,28 @@ def send(

response = self.session.send(prepared_request)

log_request_and_response_for_debugging(
prepared_request, response
) # Dev mode only
ignore_error = self._should_ignore_error(
status_code=response.status_code, errors_to_ignore=ignore_errors
)
context_logger = logger.bind(
**request_details(prepared_request, response, ignore_error)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at some point, it'd be nice if we could figure out a not-too-crazy way to add/override third party classes (e.g. PreparedRequest and Response) and "annotate" them with get_log_context methods, i.e. make them extend Contextualizable... and then some of this could be further abstracted out into those methods, rather than the standalone utility functions, which stick out a bit now.

anyway, not something to worry about in this initial iteration, just thought i'd note it!

)

if response.ok:
context_logger.info("Connector request successful.")
return response

if ignore_error:
context_logger.info(
"Connector request successful. Ignoring errors on response with status code {} as configured.",
response.status_code,
)
return response

if not response.ok:
if self._should_ignore_error(
status_code=response.status_code,
errors_to_ignore=ignore_errors,
):
logger.info(
"Ignoring errors on response with status code {} as configured.",
response.status_code,
)
return response
raise RequestFailureResponseException(response=response)
return response
context_logger.error(
"Connector request failed with status code {}.", response.status_code
)
raise RequestFailureResponseException(response=response)


class RequestFailureResponseException(FidesopsException):
Expand All @@ -242,25 +255,6 @@ def __init__(self, response: Response):
self.response = response


def log_request_and_response_for_debugging(
prepared_request: PreparedRequest, response: Response
) -> None:
"""Log SaaS request and response in dev mode only"""
if CONFIG.dev_mode:
logger.info(
"\n\n-----------SAAS REQUEST-----------"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lol, this is nice to get rid of :)

"\n{} {}"
"\nheaders: {}"
"\nbody: {}"
"\nresponse: {}",
prepared_request.method,
prepared_request.url,
prepared_request.headers,
prepared_request.body,
response._content, # pylint: disable=W0212
)


def get_retry_after(response: Response, max_retry_after: int = 300) -> Optional[float]:
"""Given a Response object, parses Retry-After header and calculates how long we should sleep for"""
retry_after = response.headers.get("Retry-After", None)
Expand Down
26 changes: 23 additions & 3 deletions src/fides/api/service/connectors/saas_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from fides.api.models.policy import Policy
from fides.api.models.privacy_request import PrivacyRequest
from fides.api.schemas.limiter.rate_limit_config import RateLimitConfig
from fides.api.schemas.policy import ActionType
from fides.api.schemas.saas.saas_config import (
ClientConfig,
ConsentRequestMap,
Expand All @@ -40,16 +41,31 @@
cache_initial_status_and_identities_for_consent_reporting,
should_opt_in_to_service,
)
from fides.api.util.logger_context_utils import (
Contextualizable,
LoggerContextKeys,
log_context,
)
from fides.api.util.saas_util import (
CUSTOM_PRIVACY_REQUEST_FIELDS,
assign_placeholders,
map_param_values,
)


class SaaSConnector(BaseConnector[AuthenticatedClient]):
class SaaSConnector(BaseConnector[AuthenticatedClient], Contextualizable):
"""A connector type to integrate with third-party SaaS APIs"""

def get_log_context(self) -> Dict[LoggerContextKeys, Any]:
return {
LoggerContextKeys.system_key: (
self.configuration.system.fides_key
if self.configuration.system
else None
),
LoggerContextKeys.connection_key: self.configuration.key,
}

def __init__(self, configuration: ConnectionConfig):
super().__init__(configuration)
required_saas_config = configuration.get_saas_config()
Expand Down Expand Up @@ -123,6 +139,7 @@ def unset_connector_state(self) -> None:
self.current_privacy_request = None
self.current_saas_request = None

@log_context
def test_connection(self) -> Optional[ConnectionTestStatus]:
"""Generates and executes a test connection based on the SaaS config"""
test_request: SaaSRequest = self.saas_config.test_request
Expand Down Expand Up @@ -150,11 +167,12 @@ def create_client(self) -> AuthenticatedClient:
client_config = self.get_client_config()
rate_limit_config = self.get_rate_limit_config()

logger.info("Creating client to {}", uri)
logger.debug("Creating client to {}", uri)
return AuthenticatedClient(
uri, self.configuration, client_config, rate_limit_config
)

@log_context(action_type=ActionType.access.value)
def retrieve_data(
self,
node: TraversalNode,
Expand Down Expand Up @@ -370,6 +388,7 @@ def process_response_data(

return rows

@log_context(action_type=ActionType.erasure.value)
def mask_data(
self,
node: TraversalNode,
Expand Down Expand Up @@ -426,7 +445,7 @@ def mask_data(
)
except ValueError as exc:
if masking_request.skip_missing_param_values:
logger.info(
logger.debug(
"Skipping optional masking request on node {}: {}",
node.address.value,
exc,
Expand Down Expand Up @@ -456,6 +475,7 @@ def relevant_consent_identities(
related_identities[identity_type] = identity_value
return related_identities

@log_context(action_type=ActionType.consent.value)
def run_consent_request(
self,
node: TraversalNode,
Expand Down
10 changes: 5 additions & 5 deletions src/fides/api/service/connectors/saas_query_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,11 @@ def get_erasure_request_by_action(
self.endpoints[collection_name].requests, action
)
if request:
logger.info(
logger.debug(
"Found matching endpoint to {} '{}' collection", action, collection_name
)
else:
logger.info(
logger.debug(
"Unable to find matching endpoint to {} '{}' collection",
action,
collection_name,
Expand Down Expand Up @@ -146,7 +146,7 @@ def get_masking_request(self) -> Optional[SaaSRequest]:
# store action name for logging purposes
self.action = action_type

logger.info(
logger.debug(
"Selecting '{}' action to perform masking request for '{}' collection.",
action_type,
self.collection_name,
Expand Down Expand Up @@ -319,7 +319,7 @@ def generate_query(
self.action, self.collection_name, self.current_request, param_values # type: ignore
)

logger.info("Populated request params for {}", self.current_request.path)
logger.debug("Populated request params for {}", self.current_request.path)

return saas_request_params

Expand Down Expand Up @@ -450,7 +450,7 @@ def generate_update_request_params(
self.action, self.collection_name, update_request, param_values # type: ignore
)

logger.info("Populated request params for {}", update_request.path)
logger.debug("Populated request params for {}", update_request.path)
return saas_request_params

def all_value_map(self, row: Row) -> Dict[str, Any]:
Expand Down
Loading
Loading