From 1b5cfb9223b73f68aa4e6327c3c61c0e50dc388b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yanik=20H=C3=A4ni?= Date: Fri, 5 Jul 2024 10:35:08 +0200 Subject: [PATCH 1/7] introduce platform_path_pattern --- .../src/datahub/ingestion/source/tableau.py | 51 ++++++------------- 1 file changed, 15 insertions(+), 36 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index 1d44fb6122a362..e7ecc97eb4be39 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -293,6 +293,16 @@ class TableauConfig( "By default, all projects will be ingested.", ) + project_path_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Filter for specific Tableau projects by checking their full path. For example, use 'My Project/Nested Project' to ingest a nested project with name 'Nested Project'. " + "The difference to project_pattern is that project_path_pattern exclusively checks the projects path and not both the path and the name." + "This is needed if you for example want to exclude all nested projects of a specific project." + "You can both allow and deny projects based on their path using a path, or a Regex pattern. " + "Deny patterns always take precedence over allow patterns. " + "By default, all projects will be ingested.", + ) + project_path_separator: str = Field( default="/", description="The separator used for the project_pattern field between project names. By default, we use a slash. " @@ -681,30 +691,16 @@ def form_path(project_id: str) -> List[str]: def _is_allowed_project(self, project: TableauProject) -> bool: # Either project name or project path should exist in allow - is_allowed: bool = self.config.project_pattern.allowed( - project.name - ) or self.config.project_pattern.allowed(self._get_project_path(project)) + is_allowed: bool = ( + self.config.project_pattern.allowed(project.name) + or self.config.project_pattern.allowed(self._get_project_path(project)) + ) and self.config.project_path_pattern.allowed(self._get_project_path(project)) if is_allowed is False: logger.info( - f"project({project.name}) is not allowed as per project_pattern" + f"Project ({project.name}) is not allowed as per project_pattern or project_path_pattern" ) return is_allowed - def _is_denied_project(self, project: TableauProject) -> bool: - # Either project name or project path should exist in deny - for deny_pattern in self.config.project_pattern.deny: - # Either name or project path is denied - if re.match( - deny_pattern, project.name, self.config.project_pattern.regex_flags - ) or re.match( - deny_pattern, - self._get_project_path(project), - self.config.project_pattern.regex_flags, - ): - return True - logger.info(f"project({project.name}) is not denied as per project_pattern") - return False - def _init_tableau_project_registry(self, all_project_map: dict) -> None: list_of_skip_projects: List[TableauProject] = [] @@ -718,23 +714,6 @@ def _init_tableau_project_registry(self, all_project_map: dict) -> None: logger.debug(f"Project {project.name} is added in project registry") self.tableau_project_registry[project.id] = project - if self.config.extract_project_hierarchy is False: - logger.debug( - "Skipping project hierarchy processing as configuration extract_project_hierarchy is " - "disabled" - ) - return - - logger.debug("Reevaluating projects as extract_project_hierarchy is enabled") - - for project in list_of_skip_projects: - if ( - project.parent_id in self.tableau_project_registry - and self._is_denied_project(project) is False - ): - logger.debug(f"Project {project.name} is added in project registry") - self.tableau_project_registry[project.id] = project - def _init_datasource_registry(self) -> None: if self.server is None: return From cd9b9cb5fa6fed2074f116024ec2298921fb8da5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yanik=20H=C3=A4ni?= Date: Mon, 9 Sep 2024 13:19:32 +0200 Subject: [PATCH 2/7] revert removal of extract_project_hierarchy config property --- .../src/datahub/ingestion/source/tableau.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index f17018895d42cd..aef8b4f572068f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -857,6 +857,21 @@ def _is_allowed_project(self, project: TableauProject) -> bool: ) return is_allowed + def _is_denied_project(self, project: TableauProject) -> bool: + # Either project name or project path should exist in deny + for deny_pattern in self.config.project_pattern.deny: + # Either name or project path is denied + if re.match( + deny_pattern, project.name, self.config.project_pattern.regex_flags + ) or re.match( + deny_pattern, + self._get_project_path(project), + self.config.project_pattern.regex_flags, + ): + return True + logger.info(f"project({project.name}) is not denied as per project_pattern") + return False + def _init_tableau_project_registry(self, all_project_map: dict) -> None: list_of_skip_projects: List[TableauProject] = [] projects_to_ingest = {} @@ -870,6 +885,22 @@ def _init_tableau_project_registry(self, all_project_map: dict) -> None: logger.debug(f"Project {project.name} is added in project registry") projects_to_ingest[project.id] = project + if self.config.extract_project_hierarchy is False: + logger.debug( + "Skipping project hierarchy processing as configuration extract_project_hierarchy is " + "disabled" + ) + else: + logger.debug("Reevaluating projects as extract_project_hierarchy is enabled") + + for project in list_of_skip_projects: + if ( + project.parent_id in self.tableau_project_registry + and self._is_denied_project(project) is False + ): + logger.debug(f"Project {project.name} is added in project registry") + self.tableau_project_registry[project.id] = project + # We rely on automatic browse paths (v2) when creating containers. That's why we need to sort the projects here. # Otherwise, nested projects will not have the correct browse paths if not created in correct order / hierarchy. self.tableau_project_registry = OrderedDict( From 67249699cae2061028a472bd3a75fdc6788229a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yanik=20H=C3=A4ni?= Date: Mon, 9 Sep 2024 13:23:12 +0200 Subject: [PATCH 3/7] fix linting issues --- .../src/datahub/ingestion/source/tableau.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index aef8b4f572068f..2c75dea9938e30 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -862,7 +862,7 @@ def _is_denied_project(self, project: TableauProject) -> bool: for deny_pattern in self.config.project_pattern.deny: # Either name or project path is denied if re.match( - deny_pattern, project.name, self.config.project_pattern.regex_flags + deny_pattern, project.name, self.config.project_pattern.regex_flags ) or re.match( deny_pattern, self._get_project_path(project), @@ -891,12 +891,14 @@ def _init_tableau_project_registry(self, all_project_map: dict) -> None: "disabled" ) else: - logger.debug("Reevaluating projects as extract_project_hierarchy is enabled") + logger.debug( + "Reevaluating projects as extract_project_hierarchy is enabled" + ) for project in list_of_skip_projects: if ( - project.parent_id in self.tableau_project_registry - and self._is_denied_project(project) is False + project.parent_id in self.tableau_project_registry + and self._is_denied_project(project) is False ): logger.debug(f"Project {project.name} is added in project registry") self.tableau_project_registry[project.id] = project From 135428e8de9174d295bf06e6ee518467d1ae8e65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yanik=20H=C3=A4ni?= Date: Wed, 11 Sep 2024 09:10:33 +0200 Subject: [PATCH 4/7] change description and fix project hierarchy bug --- .../datahub/ingestion/source/tableau/tableau.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 5e17099cddd091..83f85fcb23d591 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -322,12 +322,12 @@ class TableauConfig( project_path_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), - description="Filter for specific Tableau projects by checking their full path. For example, use 'My Project/Nested Project' to ingest a nested project with name 'Nested Project'. " - "The difference to project_pattern is that project_path_pattern exclusively checks the projects path and not both the path and the name." - "This is needed if you for example want to exclude all nested projects of a specific project." - "You can both allow and deny projects based on their path using a path, or a Regex pattern. " - "Deny patterns always take precedence over allow patterns. " - "By default, all projects will be ingested.", + description="Filters Tableau projects by their full path. For instance, 'My Project/Nested Project' targets a specific nested project named 'Nested Project'." + " Unlike project_pattern, this field only checks the project path, not both the path and project name." + " This is useful when you need to exclude all nested projects under a particular project." + " You can allow or deny projects by specifying their path or a regular expression pattern." + " Deny patterns always override allow patterns." + " By default, all projects are ingested.", ) project_path_separator: str = Field( @@ -910,11 +910,11 @@ def _init_tableau_project_registry(self, all_project_map: dict) -> None: for project in list_of_skip_projects: if ( - project.parent_id in self.tableau_project_registry + project.parent_id in projects_to_ingest and self._is_denied_project(project) is False ): logger.debug(f"Project {project.name} is added in project registry") - self.tableau_project_registry[project.id] = project + projects_to_ingest[project.id] = project # We rely on automatic browse paths (v2) when creating containers. That's why we need to sort the projects here. # Otherwise, nested projects will not have the correct browse paths if not created in correct order / hierarchy. From e26aec918c7f8b9f38dd3f4ca179a7a1903ce97b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yanik=20H=C3=A4ni?= Date: Wed, 25 Sep 2024 14:54:54 +0200 Subject: [PATCH 5/7] deprecate project_pattern --- .../ingestion/source/tableau/tableau.py | 22 ++++++++----- .../tableau/test_tableau_ingest.py | 31 ++++++++++++++++++- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 83f85fcb23d591..9f011790990ec2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -313,18 +313,18 @@ class TableauConfig( # Tableau project pattern project_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), - description="Filter for specific Tableau projects. For example, use 'My Project' to ingest a root-level Project with name 'My Project', or 'My Project/Nested Project' to ingest a nested Project with name 'Nested Project'. " + description="[deprecated] Use project_path_pattern instead. Filter for specific Tableau projects. For example, use 'My Project' to ingest a root-level Project with name 'My Project', or 'My Project/Nested Project' to ingest a nested Project with name 'Nested Project'. " "By default, all Projects nested inside a matching Project will be included in ingestion. " "You can both allow and deny projects based on their name using their name, or a Regex pattern. " "Deny patterns always take precedence over allow patterns. " "By default, all projects will be ingested.", ) + _deprecate_projects_pattern = pydantic_field_deprecated("project_pattern") project_path_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), description="Filters Tableau projects by their full path. For instance, 'My Project/Nested Project' targets a specific nested project named 'Nested Project'." - " Unlike project_pattern, this field only checks the project path, not both the path and project name." - " This is useful when you need to exclude all nested projects under a particular project." + " This is also useful when you need to exclude all nested projects under a particular project." " You can allow or deny projects by specifying their path or a regular expression pattern." " Deny patterns always override allow patterns." " By default, all projects are ingested.", @@ -464,17 +464,23 @@ class TableauConfig( def projects_backward_compatibility(cls, values: Dict) -> Dict: projects = values.get("projects") project_pattern = values.get("project_pattern") - if project_pattern is None and projects: + project_path_pattern = values.get("project_path_pattern") + if project_pattern is None and project_path_pattern is None and projects: logger.warning( - "project_pattern is not set but projects is set. projects is deprecated, please use " - "project_pattern instead." + "projects is deprecated, please use " "project_path_pattern instead." ) logger.info("Initializing project_pattern from projects") values["project_pattern"] = AllowDenyPattern( allow=[f"^{prj}$" for prj in projects] ) - elif project_pattern != AllowDenyPattern.allow_all() and projects: - raise ValueError("projects is deprecated. Please use project_pattern only.") + elif (project_pattern or project_path_pattern) and projects: + raise ValueError( + "projects is deprecated. Please use project_path_pattern only." + ) + elif project_path_pattern and project_pattern: + raise ValueError( + "project_pattern is deprecated. Please use project_path_pattern only." + ) return values diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 4be39f02757bad..de68804de0ee5c 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -545,7 +545,36 @@ def test_value_error_projects_and_project_pattern( pipeline_config=new_config, ) except Exception as e: - assert "projects is deprecated. Please use project_pattern only" in str(e) + assert "projects is deprecated. Please use project_path_pattern only" in str(e) + + +def test_project_pattern_deprecation( + pytestconfig, tmp_path, mock_datahub_graph +): + # Ingestion should raise ValueError + output_file_name: str = "tableau_project_pattern_deprecation_mces.json" + golden_file_name: str = "tableau_project_pattern_deprecation_mces_golden.json" + + new_config = config_source_default.copy() + del new_config["projects"] + new_config["project_pattern"] = {"allow": ["^Samples$"]} + new_config["project_path_pattern"] = {"allow": ["^Samples$"]} + + try: + tableau_ingest_common( + pytestconfig, + tmp_path, + mock_data(), + golden_file_name, + output_file_name, + mock_datahub_graph, + pipeline_config=new_config, + ) + except Exception as e: + assert ( + "project_pattern is deprecated. Please use project_path_pattern only" + in str(e) + ) @freeze_time(FROZEN_TIME) From 721f3ab2cdaecde22edae1d3b4aedaaa86e7deae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yanik=20H=C3=A4ni?= Date: Wed, 25 Sep 2024 16:21:14 +0200 Subject: [PATCH 6/7] fix linting issues --- .../tests/integration/tableau/test_tableau_ingest.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index de68804de0ee5c..54b805d022d604 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -548,9 +548,7 @@ def test_value_error_projects_and_project_pattern( assert "projects is deprecated. Please use project_path_pattern only" in str(e) -def test_project_pattern_deprecation( - pytestconfig, tmp_path, mock_datahub_graph -): +def test_project_pattern_deprecation(pytestconfig, tmp_path, mock_datahub_graph): # Ingestion should raise ValueError output_file_name: str = "tableau_project_pattern_deprecation_mces.json" golden_file_name: str = "tableau_project_pattern_deprecation_mces_golden.json" From d73b058bcbe03a1609bdc55ca045bc47c7bdf0ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yanik=20H=C3=A4ni?= Date: Thu, 26 Sep 2024 13:21:20 +0200 Subject: [PATCH 7/7] add new tests for project_path_pattern --- ...roject_path_pattern_allow_mces_golden.json | 352 ++++++++++++++++++ ...project_path_pattern_deny_mces_golden.json | 184 +++++++++ .../tableau/test_tableau_ingest.py | 38 ++ 3 files changed, 574 insertions(+) create mode 100644 metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_allow_mces_golden.json create mode 100644 metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_deny_mces_golden.json diff --git a/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_allow_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_allow_mces_golden.json new file mode 100644 index 00000000000000..8798ca291422cb --- /dev/null +++ b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_allow_mces_golden.json @@ -0,0 +1,352 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "190a6a5c-63ed-4de1-8045-faeae5df5b01" + }, + "name": "default" + } + }, + "systemMetadata": { + "lastObserved": 1727349368101, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368102, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368103, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368104, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1727349368105, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "79d02655-88e5-45a6-9f9b-eeaf5fe54903" + }, + "name": "DenyProject" + } + }, + "systemMetadata": { + "lastObserved": 1727349368108, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368109, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368109, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368110, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + } + }, + "systemMetadata": { + "lastObserved": 1727349368111, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368112, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "workbook_id": "ee012e36-d916-4c21-94ab-f0d66736af4e" + }, + "externalUrl": "https://do-not-connect/#/site/acryl/workbooks/17904", + "name": "Deny Pattern WorkBook", + "description": "" + } + }, + "systemMetadata": { + "lastObserved": 1727349368113, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368114, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368115, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Workbook" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368116, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1727349368117, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce" + } + }, + "systemMetadata": { + "lastObserved": 1727349368118, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "urn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368118, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_deny_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_deny_mces_golden.json new file mode 100644 index 00000000000000..96dcfeb246c91b --- /dev/null +++ b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_deny_mces_golden.json @@ -0,0 +1,184 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "c30aafe5-44f4-4f28-80d3-d181010a263c" + }, + "name": "Project 2" + } + }, + "systemMetadata": { + "lastObserved": 1727349368232, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368233, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368233, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368234, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1727349368235, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "910733aa-2e95-4ac3-a2e8-71570751099d" + }, + "name": "Samples" + } + }, + "systemMetadata": { + "lastObserved": 1727349368238, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368239, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368239, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368240, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1727349368241, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 54b805d022d604..5a5552a78c56fa 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -575,6 +575,44 @@ def test_project_pattern_deprecation(pytestconfig, tmp_path, mock_datahub_graph) ) +def test_project_path_pattern_allow(pytestconfig, tmp_path, mock_datahub_graph): + output_file_name: str = "tableau_project_path_pattern_allow_mces.json" + golden_file_name: str = "tableau_project_path_pattern_allow_mces_golden.json" + + new_config = config_source_default.copy() + del new_config["projects"] + new_config["project_path_pattern"] = {"allow": ["default/DenyProject"]} + + tableau_ingest_common( + pytestconfig, + tmp_path, + mock_data(), + golden_file_name, + output_file_name, + mock_datahub_graph, + pipeline_config=new_config, + ) + + +def test_project_path_pattern_deny(pytestconfig, tmp_path, mock_datahub_graph): + output_file_name: str = "tableau_project_path_pattern_deny_mces.json" + golden_file_name: str = "tableau_project_path_pattern_deny_mces_golden.json" + + new_config = config_source_default.copy() + del new_config["projects"] + new_config["project_path_pattern"] = {"deny": ["^default.*"]} + + tableau_ingest_common( + pytestconfig, + tmp_path, + mock_data(), + golden_file_name, + output_file_name, + mock_datahub_graph, + pipeline_config=new_config, + ) + + @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_tableau_ingest_with_platform_instance(