From 0de046dfbef53cfe754c87ec58a638c7a8962dc6 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Mon, 8 May 2023 13:41:37 -0700 Subject: [PATCH] Allow duplicate refable node names across packages (#7374) --- .../unreleased/Features-20230429-155057.yaml | 7 ++ core/dbt/context/providers.py | 1 + core/dbt/contracts/graph/manifest.py | 45 ++++++++-- core/dbt/exceptions.py | 17 ++++ core/dbt/parser/manifest.py | 10 +-- core/dbt/parser/schemas.py | 5 +- test/unit/test_manifest.py | 89 +++++++++++++++++++ .../duplicates/test_duplicate_model.py | 10 +-- .../multi_project/test_publication.py | 4 +- 9 files changed, 165 insertions(+), 23 deletions(-) create mode 100644 .changes/unreleased/Features-20230429-155057.yaml diff --git a/.changes/unreleased/Features-20230429-155057.yaml b/.changes/unreleased/Features-20230429-155057.yaml new file mode 100644 index 00000000000..49ee5ffd74c --- /dev/null +++ b/.changes/unreleased/Features-20230429-155057.yaml @@ -0,0 +1,7 @@ +kind: Features +body: Allow duplicate manifest node (models, seeds, analyses, snapshots) names across + packages +time: 2023-04-29T15:50:57.757832-04:00 +custom: + Author: MichelleArk + Issue: "7446" diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py index 7015c031f4b..8a74cc08d1e 100644 --- a/core/dbt/context/providers.py +++ b/core/dbt/context/providers.py @@ -482,6 +482,7 @@ def resolve( target_version: Optional[NodeVersion] = None, ) -> RelationProxy: target_model = self.manifest.resolve_ref( + self.model, target_name, target_package, target_version, diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index 5ff08e00131..f4caefbf6e4 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -49,6 +49,7 @@ DuplicateResourceNameError, DuplicateMacroInPackageError, DuplicateMaterializationNameError, + AmbiguousResourceNameRefError, ) from dbt.helper_types import PathSet from dbt.events.functions import fire_event @@ -152,27 +153,36 @@ class RefableLookup(dbtClassMixin): _lookup_types: ClassVar[set] = set(NodeType.refable()) _versioned_types: ClassVar[set] = set(NodeType.versioned()) - # refables are actually unique, so the Dict[PackageName, UniqueID] will - # only ever have exactly one value, but doing 3 dict lookups instead of 1 - # is not a big deal at all and retains consistency def __init__(self, manifest: "Manifest"): self.storage: Dict[str, Dict[PackageName, UniqueID]] = {} self.populate(manifest) self.populate_public_nodes(manifest) - def get_unique_id(self, key, package: Optional[PackageName], version: Optional[NodeVersion]): + def get_unique_id( + self, + key: str, + package: Optional[PackageName], + version: Optional[NodeVersion], + node: Optional[GraphMemberNode] = None, + ): if version: key = f"{key}.v{version}" - return find_unique_id_for_package(self.storage, key, package) + + unique_ids = self._find_unique_ids_for_package(key, package) + if len(unique_ids) > 1: + raise AmbiguousResourceNameRefError(key, unique_ids, node) + else: + return unique_ids[0] if unique_ids else None def find( self, - key, + key: str, package: Optional[PackageName], version: Optional[NodeVersion], manifest: "Manifest", + source_node: Optional[GraphMemberNode] = None, ): - unique_id = self.get_unique_id(key, package, version) + unique_id = self.get_unique_id(key, package, version, source_node) if unique_id is not None: node = self.perform_lookup(unique_id, manifest) # If this is an unpinned ref (no 'version' arg was passed), @@ -235,6 +245,22 @@ def perform_lookup(self, unique_id: UniqueID, manifest) -> ManifestOrPublicNode: ) return node + def _find_unique_ids_for_package(self, key, package: Optional[PackageName]) -> List[str]: + if key not in self.storage: + return [] + + pkg_dct: Mapping[PackageName, UniqueID] = self.storage[key] + + if package is None: + if not pkg_dct: + return [] + else: + return list(pkg_dct.values()) + elif package in pkg_dct: + return [pkg_dct[package]] + else: + return [] + class MetricLookup(dbtClassMixin): def __init__(self, manifest: "Manifest"): @@ -936,6 +962,7 @@ def analysis_lookup(self) -> AnalysisLookup: # and dbt.parser.manifest._process_refs_for_node def resolve_ref( self, + source_node: GraphMemberNode, target_model_name: str, target_model_package: Optional[str], target_model_version: Optional[NodeVersion], @@ -948,7 +975,9 @@ def resolve_ref( candidates = _packages_to_search(current_project, node_package, target_model_package) for pkg in candidates: - node = self.ref_lookup.find(target_model_name, pkg, target_model_version, self) + node = self.ref_lookup.find( + target_model_name, pkg, target_model_version, self, source_node + ) if node is not None and ( (hasattr(node, "config") and node.config.enabled) or node.is_public_node diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 35bf10e344d..e92948d9ad0 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -1975,6 +1975,23 @@ def get_message(self) -> str: return msg +class AmbiguousResourceNameRefError(CompilationError): + def __init__(self, duped_name, unique_ids, node=None): + self.duped_name = duped_name + self.unique_ids = unique_ids + self.packages = [unique_id.split(".")[1] for unique_id in unique_ids] + super().__init__(msg=self.get_message(), node=node) + + def get_message(self) -> str: + formatted_unique_ids = "'{0}'".format("', '".join(self.unique_ids)) + formatted_packages = "'{0}'".format("' or '".join(self.packages)) + msg = ( + f"When referencing '{self.duped_name}', dbt found nodes in multiple packages: {formatted_unique_ids}" + f"\nTo fix this, use two-argument 'ref', with the package name first: {formatted_packages}" + ) + return msg + + class AmbiguousCatalogMatchError(CompilationError): def __init__(self, unique_id: str, match_1, match_2): self.unique_id = unique_id diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 4a802ba7a13..a0b1f34de37 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -1261,30 +1261,23 @@ def _check_resource_uniqueness( manifest: Manifest, config: RuntimeConfig, ) -> None: - names_resources: Dict[str, ManifestNode] = {} alias_resources: Dict[str, ManifestNode] = {} for resource, node in manifest.nodes.items(): if not node.is_relational: continue - name = node.name # the full node name is really defined by the adapter's relation relation_cls = get_relation_class_by_name(config.credentials.type) relation = relation_cls.create_from(config=config, node=node) full_node_name = str(relation) - existing_node = names_resources.get(name) - if existing_node is not None and not existing_node.is_versioned: - raise dbt.exceptions.DuplicateResourceNameError(existing_node, node) - existing_alias = alias_resources.get(full_node_name) if existing_alias is not None: raise AmbiguousAliasError( node_1=existing_alias, node_2=node, duped_name=full_node_name ) - names_resources[name] = node alias_resources[full_node_name] = node @@ -1362,6 +1355,7 @@ def _process_refs_for_exposure(manifest: Manifest, current_project: str, exposur ) target_model = manifest.resolve_ref( + exposure, target_model_name, target_model_package, target_model_version, @@ -1414,6 +1408,7 @@ def _process_refs_for_metric(manifest: Manifest, current_project: str, metric: M ) target_model = manifest.resolve_ref( + metric, target_model_name, target_model_package, target_model_version, @@ -1518,6 +1513,7 @@ def _process_refs_for_node(manifest: Manifest, current_project: str, node: Manif ) target_model = manifest.resolve_ref( + node, target_model_name, target_model_package, target_model_version, diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 333f82f156e..727824f323e 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -918,7 +918,10 @@ def parse_patch(self, block: TargetBlock[NodeTarget], refs: ParserRef) -> None: assert isinstance(self.yaml.file, SchemaSourceFile) source_file: SchemaSourceFile = self.yaml.file if patch.yaml_key in ["models", "seeds", "snapshots"]: - unique_id = self.manifest.ref_lookup.get_unique_id(patch.name, None, None) + unique_id = self.manifest.ref_lookup.get_unique_id( + patch.name, self.project.project_name, None + ) or self.manifest.ref_lookup.get_unique_id(patch.name, None, None) + if unique_id: resource_type = NodeType(unique_id.split(".")[0]) if resource_type.pluralize() != patch.yaml_key: diff --git a/test/unit/test_manifest.py b/test/unit/test_manifest.py index 3be2b6dceac..ccca812810e 100644 --- a/test/unit/test_manifest.py +++ b/test/unit/test_manifest.py @@ -37,6 +37,7 @@ ) from dbt.events.functions import reset_metadata_vars +from dbt.exceptions import AmbiguousResourceNameRefError from dbt.flags import set_from_args from dbt.node_types import NodeType @@ -1449,11 +1450,75 @@ def _refable_parameter_sets(): version=None, expected=None, ), + # duplicate node name across package + FindNodeSpec( + nodes=[MockNode("project_a", "my_model"), MockNode("project_b", "my_model")], + sources=[], + package="project_a", + version=None, + expected=("project_a", "my_model"), + ), + # duplicate node name across package: root node preferred to package node + FindNodeSpec( + nodes=[MockNode("root", "my_model"), MockNode("project_a", "my_model")], + sources=[], + package=None, + version=None, + expected=("root", "my_model"), + ), + FindNodeSpec( + nodes=[MockNode("root", "my_model"), MockNode("project_a", "my_model")], + sources=[], + package="root", + version=None, + expected=("root", "my_model"), + ), + FindNodeSpec( + nodes=[MockNode("root", "my_model"), MockNode("project_a", "my_model")], + sources=[], + package="project_a", + version=None, + expected=("project_a", "my_model"), + ), + # duplicate node name across package: resolved by version + FindNodeSpec( + nodes=[ + MockNode("project_a", "my_model", version="1"), + MockNode("project_b", "my_model", version="2"), + ], + sources=[], + package=None, + version="1", + expected=("project_a", "my_model", "1"), + ), ] ) return sets +def _ambiguous_ref_parameter_sets(): + sets = [ + FindNodeSpec( + nodes=[MockNode("project_a", "my_model"), MockNode("project_b", "my_model")], + sources=[], + package=None, + version=None, + expected=None, + ), + FindNodeSpec( + nodes=[ + MockNode("project_a", "my_model", version="2", is_latest_version=True), + MockNode("project_b", "my_model", version="1", is_latest_version=True), + ], + sources=[], + package=None, + version=None, + expected=None, + ), + ] + return sets + + def id_nodes(arg): if isinstance(arg, list): node_names = "__".join(f"{n.package_name}_{n.search_name}" for n in arg) @@ -1470,6 +1535,7 @@ def id_nodes(arg): def test_resolve_ref(nodes, sources, package, version, expected): manifest = make_manifest(nodes=nodes, sources=sources) result = manifest.resolve_ref( + source_node=None, target_model_name="my_model", target_model_package=package, target_model_version=version, @@ -1491,6 +1557,29 @@ def test_resolve_ref(nodes, sources, package, version, expected): assert result.package_name == expected_package +@pytest.mark.parametrize( + "nodes,sources,package,version,expected", + _ambiguous_ref_parameter_sets(), + ids=id_nodes, +) +def test_resolve_ref_ambiguous_resource_name_across_packages( + nodes, sources, package, version, expected +): + manifest = make_manifest( + nodes=nodes, + sources=sources, + ) + with pytest.raises(AmbiguousResourceNameRefError): + manifest.resolve_ref( + source_node=None, + target_model_name="my_model", + target_model_package=None, + target_model_version=version, + current_project="root", + node_package="root", + ) + + def _source_parameter_sets(): sets = [ # empties diff --git a/tests/functional/duplicates/test_duplicate_model.py b/tests/functional/duplicates/test_duplicate_model.py index 7a53fd6de63..17be1ff20b9 100644 --- a/tests/functional/duplicates/test_duplicate_model.py +++ b/tests/functional/duplicates/test_duplicate_model.py @@ -1,6 +1,6 @@ import pytest -from dbt.exceptions import CompilationError, DuplicateResourceNameError +from dbt.exceptions import CompilationError, AmbiguousAliasError from dbt.tests.fixtures.project import write_project_files from dbt.tests.util import run_dbt, get_manifest @@ -88,7 +88,7 @@ def test_duplicate_model_disabled_partial_parsing(self, project): assert len(results) == 1 -class TestDuplicateModelEnabledAcrossPackages: +class TestDuplicateModelAliasEnabledAcrossPackages: @pytest.fixture(scope="class") def models(self): return {"table_model.sql": enabled_model_sql} @@ -105,10 +105,10 @@ def setUp(self, project_root): def packages(self): return {"packages": [{"local": "local_dependency"}]} - def test_duplicate_model_enabled_across_packages(self, project): + def test_duplicate_model_alias_enabled_across_packages(self, project): run_dbt(["deps"]) - message = "dbt found two models with the name" - with pytest.raises(DuplicateResourceNameError) as exc: + message = "dbt found two resources with the database representation" + with pytest.raises(AmbiguousAliasError) as exc: run_dbt(["run"]) assert message in str(exc.value) diff --git a/tests/functional/multi_project/test_publication.py b/tests/functional/multi_project/test_publication.py index ac3a86a8cd8..2725cb4c8bc 100644 --- a/tests/functional/multi_project/test_publication.py +++ b/tests/functional/multi_project/test_publication.py @@ -142,8 +142,8 @@ def test_pub_artifacts(self, project): publication = PublicationArtifact.from_dict(publication_dict) assert publication.dependencies == ["marketing"] - # target_model_name, target_model_package, target_model_version, current_project, node_package - resolved_node = manifest.resolve_ref("fct_one", "marketing", None, "test", "test") + # source_node, target_model_name, target_model_package, target_model_version, current_project, node_package + resolved_node = manifest.resolve_ref(None, "fct_one", "marketing", None, "test", "test") assert resolved_node assert isinstance(resolved_node, PublicModel) assert resolved_node.unique_id == "model.marketing.fct_one"