From 3dab058c73de55726be36ddabbae606daf3bcf74 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 5 Nov 2021 16:55:58 +0100 Subject: [PATCH] `incorporate_indirect_nodes` should pass if not needed (#4214) * Pass incorporate_indirect_nodes if not needed * Fix flake8 * Add changelog entry --- CHANGELOG.md | 2 ++ core/dbt/graph/selector.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b11b67a9601..6a318ada61b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,12 +3,14 @@ ### Features - Allow nullable `error_after` in source freshness ([#3874](https://github.com/dbt-labs/dbt-core/issues/3874), [#3955](https://github.com/dbt-labs/dbt-core/pull/3955)) - Increase performance of graph subset selection ([#4135](https://github.com/dbt-labs/dbt-core/issues/4135),[#4155](https://github.com/dbt-labs/dbt-core/pull/4155)) +- Speed up node selection by skipping indirect node incorporation when not needed ([#4213](https://github.com/dbt-labs/dbt-core/issues/4213),[#4214](https://github.com/dbt-labs/dbt-core/pull/4214)) ### Fixes - Changes unit tests using `assertRaisesRegexp` to `assertRaisesRegex` ### Under the hood - Bump artifact schema versions for 1.0.0: manifest v4, run results v4, sources v3. Notable changes: schema test + data test nodes are renamed to generic test + singular test nodes; freshness threshold default values ([#4191](https://github.com/dbt-labs/dbt-core/pull/4191)) +- Speed up node selection by skipping `incorporate_indirect_nodes` if not needed ([#4213](https://github.com/dbt-labs/dbt-core/issues/4213), [#4214](https://github.com/dbt-labs/dbt-core/issues/4214)) Contributors: - [@kadero](https://github.com/kadero) ([3955](https://github.com/dbt-labs/dbt-core/pull/3955)) diff --git a/core/dbt/graph/selector.py b/core/dbt/graph/selector.py index f2026621eb9..7854f844cc0 100644 --- a/core/dbt/graph/selector.py +++ b/core/dbt/graph/selector.py @@ -255,6 +255,10 @@ def incorporate_indirect_nodes( # Check tests previously selected indirectly to see if ALL their # parents are now present. + # performance: if identical, skip the processing below + if set(direct_nodes) == set(indirect_nodes): + return direct_nodes + selected = set(direct_nodes) for unique_id in indirect_nodes: