diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 4a1ec14ca1d4e..9f011790990ec 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -313,12 +313,22 @@ class TableauConfig( # Tableau project pattern project_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), - description="Filter for specific Tableau projects. For example, use 'My Project' to ingest a root-level Project with name 'My Project', or 'My Project/Nested Project' to ingest a nested Project with name 'Nested Project'. " + description="[deprecated] Use project_path_pattern instead. Filter for specific Tableau projects. For example, use 'My Project' to ingest a root-level Project with name 'My Project', or 'My Project/Nested Project' to ingest a nested Project with name 'Nested Project'. " "By default, all Projects nested inside a matching Project will be included in ingestion. " "You can both allow and deny projects based on their name using their name, or a Regex pattern. " "Deny patterns always take precedence over allow patterns. " "By default, all projects will be ingested.", ) + _deprecate_projects_pattern = pydantic_field_deprecated("project_pattern") + + project_path_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Filters Tableau projects by their full path. For instance, 'My Project/Nested Project' targets a specific nested project named 'Nested Project'." + " This is also useful when you need to exclude all nested projects under a particular project." + " You can allow or deny projects by specifying their path or a regular expression pattern." + " Deny patterns always override allow patterns." + " By default, all projects are ingested.", + ) project_path_separator: str = Field( default="/", @@ -454,17 +464,23 @@ class TableauConfig( def projects_backward_compatibility(cls, values: Dict) -> Dict: projects = values.get("projects") project_pattern = values.get("project_pattern") - if project_pattern is None and projects: + project_path_pattern = values.get("project_path_pattern") + if project_pattern is None and project_path_pattern is None and projects: logger.warning( - "project_pattern is not set but projects is set. projects is deprecated, please use " - "project_pattern instead." + "projects is deprecated, please use " "project_path_pattern instead." ) logger.info("Initializing project_pattern from projects") values["project_pattern"] = AllowDenyPattern( allow=[f"^{prj}$" for prj in projects] ) - elif project_pattern != AllowDenyPattern.allow_all() and projects: - raise ValueError("projects is deprecated. Please use project_pattern only.") + elif (project_pattern or project_path_pattern) and projects: + raise ValueError( + "projects is deprecated. Please use project_path_pattern only." + ) + elif project_path_pattern and project_pattern: + raise ValueError( + "project_pattern is deprecated. Please use project_path_pattern only." + ) return values @@ -850,12 +866,13 @@ def form_path(project_id: str) -> List[str]: def _is_allowed_project(self, project: TableauProject) -> bool: # Either project name or project path should exist in allow - is_allowed: bool = self.config.project_pattern.allowed( - project.name - ) or self.config.project_pattern.allowed(self._get_project_path(project)) + is_allowed: bool = ( + self.config.project_pattern.allowed(project.name) + or self.config.project_pattern.allowed(self._get_project_path(project)) + ) and self.config.project_path_pattern.allowed(self._get_project_path(project)) if is_allowed is False: logger.info( - f"project({project.name}) is not allowed as per project_pattern" + f"Project ({project.name}) is not allowed as per project_pattern or project_path_pattern" ) return is_allowed @@ -887,28 +904,29 @@ def _init_tableau_project_registry(self, all_project_map: dict) -> None: logger.debug(f"Project {project.name} is added in project registry") projects_to_ingest[project.id] = project - # We rely on automatic browse paths (v2) when creating containers. That's why we need to sort the projects here. - # Otherwise, nested projects will not have the correct browse paths if not created in correct order / hierarchy. - self.tableau_project_registry = OrderedDict( - sorted(projects_to_ingest.items(), key=lambda item: len(item[1].path)) - ) - if self.config.extract_project_hierarchy is False: logger.debug( "Skipping project hierarchy processing as configuration extract_project_hierarchy is " "disabled" ) - return + else: + logger.debug( + "Reevaluating projects as extract_project_hierarchy is enabled" + ) - logger.debug("Reevaluating projects as extract_project_hierarchy is enabled") + for project in list_of_skip_projects: + if ( + project.parent_id in projects_to_ingest + and self._is_denied_project(project) is False + ): + logger.debug(f"Project {project.name} is added in project registry") + projects_to_ingest[project.id] = project - for project in list_of_skip_projects: - if ( - project.parent_id in self.tableau_project_registry - and self._is_denied_project(project) is False - ): - logger.debug(f"Project {project.name} is added in project registry") - self.tableau_project_registry[project.id] = project + # We rely on automatic browse paths (v2) when creating containers. That's why we need to sort the projects here. + # Otherwise, nested projects will not have the correct browse paths if not created in correct order / hierarchy. + self.tableau_project_registry = OrderedDict( + sorted(projects_to_ingest.items(), key=lambda item: len(item[1].path)) + ) def _init_datasource_registry(self) -> None: if self.server is None: diff --git a/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_allow_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_allow_mces_golden.json new file mode 100644 index 0000000000000..8798ca291422c --- /dev/null +++ b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_allow_mces_golden.json @@ -0,0 +1,352 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "190a6a5c-63ed-4de1-8045-faeae5df5b01" + }, + "name": "default" + } + }, + "systemMetadata": { + "lastObserved": 1727349368101, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368102, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368103, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368104, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1727349368105, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "79d02655-88e5-45a6-9f9b-eeaf5fe54903" + }, + "name": "DenyProject" + } + }, + "systemMetadata": { + "lastObserved": 1727349368108, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368109, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368109, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368110, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + } + }, + "systemMetadata": { + "lastObserved": 1727349368111, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368112, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "workbook_id": "ee012e36-d916-4c21-94ab-f0d66736af4e" + }, + "externalUrl": "https://do-not-connect/#/site/acryl/workbooks/17904", + "name": "Deny Pattern WorkBook", + "description": "" + } + }, + "systemMetadata": { + "lastObserved": 1727349368113, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368114, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368115, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Workbook" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368116, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1727349368117, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce" + } + }, + "systemMetadata": { + "lastObserved": 1727349368118, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "urn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368118, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_deny_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_deny_mces_golden.json new file mode 100644 index 0000000000000..96dcfeb246c91 --- /dev/null +++ b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_deny_mces_golden.json @@ -0,0 +1,184 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "c30aafe5-44f4-4f28-80d3-d181010a263c" + }, + "name": "Project 2" + } + }, + "systemMetadata": { + "lastObserved": 1727349368232, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368233, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368233, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368234, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1727349368235, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "910733aa-2e95-4ac3-a2e8-71570751099d" + }, + "name": "Samples" + } + }, + "systemMetadata": { + "lastObserved": 1727349368238, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368239, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368239, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368240, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1727349368241, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 4be39f02757ba..5a5552a78c56f 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -545,7 +545,72 @@ def test_value_error_projects_and_project_pattern( pipeline_config=new_config, ) except Exception as e: - assert "projects is deprecated. Please use project_pattern only" in str(e) + assert "projects is deprecated. Please use project_path_pattern only" in str(e) + + +def test_project_pattern_deprecation(pytestconfig, tmp_path, mock_datahub_graph): + # Ingestion should raise ValueError + output_file_name: str = "tableau_project_pattern_deprecation_mces.json" + golden_file_name: str = "tableau_project_pattern_deprecation_mces_golden.json" + + new_config = config_source_default.copy() + del new_config["projects"] + new_config["project_pattern"] = {"allow": ["^Samples$"]} + new_config["project_path_pattern"] = {"allow": ["^Samples$"]} + + try: + tableau_ingest_common( + pytestconfig, + tmp_path, + mock_data(), + golden_file_name, + output_file_name, + mock_datahub_graph, + pipeline_config=new_config, + ) + except Exception as e: + assert ( + "project_pattern is deprecated. Please use project_path_pattern only" + in str(e) + ) + + +def test_project_path_pattern_allow(pytestconfig, tmp_path, mock_datahub_graph): + output_file_name: str = "tableau_project_path_pattern_allow_mces.json" + golden_file_name: str = "tableau_project_path_pattern_allow_mces_golden.json" + + new_config = config_source_default.copy() + del new_config["projects"] + new_config["project_path_pattern"] = {"allow": ["default/DenyProject"]} + + tableau_ingest_common( + pytestconfig, + tmp_path, + mock_data(), + golden_file_name, + output_file_name, + mock_datahub_graph, + pipeline_config=new_config, + ) + + +def test_project_path_pattern_deny(pytestconfig, tmp_path, mock_datahub_graph): + output_file_name: str = "tableau_project_path_pattern_deny_mces.json" + golden_file_name: str = "tableau_project_path_pattern_deny_mces_golden.json" + + new_config = config_source_default.copy() + del new_config["projects"] + new_config["project_path_pattern"] = {"deny": ["^default.*"]} + + tableau_ingest_common( + pytestconfig, + tmp_path, + mock_data(), + golden_file_name, + output_file_name, + mock_datahub_graph, + pipeline_config=new_config, + ) @freeze_time(FROZEN_TIME)