diff --git a/src/fides/api/api/v1/endpoints/generic_overrides.py b/src/fides/api/api/v1/endpoints/generic_overrides.py index 6c01610694..7ca5d8bdc9 100644 --- a/src/fides/api/api/v1/endpoints/generic_overrides.py +++ b/src/fides/api/api/v1/endpoints/generic_overrides.py @@ -86,8 +86,7 @@ async def create_dataset( ) -> Dict: """Create a new dataset""" try: - created = dataset_service.create_dataset(dataset) - return created.model_dump() + return dataset_service.create_dataset(dataset) except PydanticValidationError as e: raise HTTPException( status_code=HTTP_422_UNPROCESSABLE_ENTITY, @@ -193,8 +192,7 @@ async def get_dataset( """Get a single dataset by fides key""" service = DatasetService(db) try: - dataset = service.get_dataset(fides_key) - return dataset.model_dump() + return service.get_dataset(fides_key) except DatasetNotFoundException as e: raise HTTPException( status_code=HTTP_404_NOT_FOUND, diff --git a/src/fides/api/graph/graph.py b/src/fides/api/graph/graph.py index e99cfc550d..5d8f00c2db 100644 --- a/src/fides/api/graph/graph.py +++ b/src/fides/api/graph/graph.py @@ -196,12 +196,10 @@ def __init__(self, *datasets: GraphDataset) -> None: ) for dest_field_address, direction in ref_list: if dest_field_address.collection_address() not in self.nodes: - logger.warning( - "Referenced object {} does not exist", dest_field_address - ) + message = f"Referenced object {dest_field_address} from dataset {node_address.dataset} does not exist" + logger.warning(message) raise ValidationError( - f"Referred to object {dest_field_address} does not exist", - errors=[dest_field_address.value], + message, errors=[dest_field_address.value] ) self.edges.add( Edge.create_edge( diff --git a/src/fides/api/service/privacy_request/request_runner_service.py b/src/fides/api/service/privacy_request/request_runner_service.py index 8a3125ddc2..ae919ce7c9 100644 --- a/src/fides/api/service/privacy_request/request_runner_service.py +++ b/src/fides/api/service/privacy_request/request_runner_service.py @@ -3,7 +3,6 @@ import requests from loguru import logger -from pydantic import ValidationError from sqlalchemy.orm import Query, Session from fides.api import common_exceptions @@ -15,6 +14,7 @@ NoCachedManualWebhookEntry, PrivacyRequestExit, PrivacyRequestPaused, + ValidationError, ) from fides.api.db.session import get_db_session from fides.api.graph.config import CollectionAddress @@ -355,6 +355,17 @@ def run_privacy_request( if not dataset_config.connection_config.disabled ] dataset_graph = DatasetGraph(*dataset_graphs) + + # Add success log for dataset configuration + privacy_request.add_success_execution_log( + session, + connection_key=None, + dataset_name="Dataset reference validation", + collection_name=None, + message=f"Dataset referencevalidation successful for privacy request: {privacy_request.id}", + action_type=privacy_request.policy.get_action_type(), # type: ignore + ) + identity_data = { key: value["value"] if isinstance(value, dict) else value for key, value in privacy_request.get_cached_identity_data().items() @@ -482,7 +493,22 @@ def run_privacy_request( # the appropriate checkpoint when all the Request Tasks have run. return + except ValidationError as exc: + # Handle validation errors from dataset graph creation + logger.error(f"Error validating dataset references: {str(exc)}") + privacy_request.add_error_execution_log( + session, + connection_key=None, + dataset_name="Dataset reference validation", + collection_name=None, + message=str(exc), + action_type=privacy_request.policy.get_action_type(), # type: ignore + ) + privacy_request.error_processing(db=session) + return + except BaseException as exc: # pylint: disable=broad-except + logger.error(f"Error running privacy request: {str(exc)}") privacy_request.error_processing(db=session) # If dev mode, log traceback _log_exception(exc, CONFIG.dev_mode) diff --git a/tests/ops/service/privacy_request/test_request_runner_service.py b/tests/ops/service/privacy_request/test_request_runner_service.py index cda747792a..d49a198623 100644 --- a/tests/ops/service/privacy_request/test_request_runner_service.py +++ b/tests/ops/service/privacy_request/test_request_runner_service.py @@ -8,6 +8,7 @@ import pytest from pydantic import ValidationError from sqlalchemy.orm import Session +from sqlalchemy.orm.attributes import flag_modified from fides.api.common_exceptions import ( ClientUnsuccessfulException, @@ -15,6 +16,7 @@ ) from fides.api.graph.graph import DatasetGraph from fides.api.models.application_config import ApplicationConfig +from fides.api.models.datasetconfig import DatasetConfig from fides.api.models.policy import CurrentStep, PolicyPostWebhook from fides.api.models.privacy_request import ( ActionType, @@ -1372,3 +1374,104 @@ def test_async_callback_erasure_request( # node cannot be paused db.refresh(pr) assert pr.status == PrivacyRequestStatus.complete + + +class TestDatasetReferenceValidation: + @pytest.mark.usefixtures("dataset_config") + @mock.patch( + "fides.api.service.privacy_request.request_runner_service.access_runner" + ) + @pytest.mark.parametrize( + "dsr_version", + ["use_dsr_3_0", "use_dsr_2_0"], + ) + def test_dataset_reference_validation_success( + self, + run_access, + db: Session, + privacy_request: PrivacyRequest, + run_privacy_request_task, + request, + dsr_version, + ): + """Test that successful dataset reference validation is logged""" + + request.getfixturevalue(dsr_version) # REQUIRED to test both DSR 3.0 and 2.0 + + # Run privacy request + run_privacy_request_task.delay(privacy_request.id).get( + timeout=PRIVACY_REQUEST_TASK_TIMEOUT + ) + + # Verify success log was created + success_logs = privacy_request.execution_logs.filter_by(status="complete").all() + + validation_logs = [ + log + for log in success_logs + if log.dataset_name == "Dataset reference validation" + ] + + assert len(validation_logs) == 1 + log = validation_logs[0] + assert log.connection_key is None + assert log.collection_name is None + assert ( + log.message + == f"Dataset referencevalidation successful for privacy request: {privacy_request.id}" + ) + assert log.action_type == privacy_request.policy.get_action_type() + + @mock.patch( + "fides.api.service.privacy_request.request_runner_service.access_runner" + ) + @pytest.mark.parametrize( + "dsr_version", + ["use_dsr_3_0", "use_dsr_2_0"], + ) + def test_dataset_reference_validation_error( + self, + run_access, + db: Session, + privacy_request: PrivacyRequest, + dataset_config: DatasetConfig, + run_privacy_request_task, + request, + dsr_version, + ): + """Test that dataset reference validation errors are logged""" + + request.getfixturevalue(dsr_version) # REQUIRED to test both DSR 3.0 and 2.0 + + # Add invalid dataset reference that will cause validation error + dataset_config.ctl_dataset.collections[0]["fields"][0]["fides_meta"] = { + "references": [ + {"dataset": "invalid_dataset", "field": "invalid_collection.field"} + ] + } + flag_modified(dataset_config.ctl_dataset, "collections") + dataset_config.save(db) + + # Run privacy request + run_privacy_request_task.delay(privacy_request.id).get( + timeout=PRIVACY_REQUEST_TASK_TIMEOUT + ) + + # Verify error log was created + error_logs = privacy_request.execution_logs.filter_by(status="error").all() + + validation_logs = [ + log + for log in error_logs + if log.dataset_name == "Dataset reference validation" + ] + + assert len(validation_logs) == 1 + log = validation_logs[0] + assert log.connection_key is None + assert log.collection_name is None + assert ( + "Referenced object invalid_dataset:invalid_collection:field from dataset postgres_example_subscriptions_dataset does not exist" + in log.message + ) + assert log.action_type == privacy_request.policy.get_action_type()