Skip to content

Commit

Permalink
[#3885] Skip partial parsing if project env vars change
Browse files Browse the repository at this point in the history
  • Loading branch information
gshank committed Nov 5, 2021
1 parent c57fb1d commit 32d9d51
Show file tree
Hide file tree
Showing 14 changed files with 127 additions and 75 deletions.
25 changes: 5 additions & 20 deletions core/dbt/config/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ def get_rendered(
selectors_dict=rendered_selectors,
)

# Called by 'collect_parts' in RuntimeConfig
def render(self, renderer: DbtProjectYamlRenderer) -> 'Project':
try:
rendered = self.get_rendered(renderer)
Expand Down Expand Up @@ -397,6 +398,8 @@ def create_project(self, rendered: RenderComponents) -> 'Project':
vars_dict = cfg.vars

vars_value = VarProvider(vars_dict)
# There will never be any project_env_vars when it's first created
project_env_vars: Dict[str, Any] = {}
on_run_start: List[str] = value_or(cfg.on_run_start, [])
on_run_end: List[str] = value_or(cfg.on_run_end, [])

Expand Down Expand Up @@ -444,6 +447,7 @@ def create_project(self, rendered: RenderComponents) -> 'Project':
vars=vars_value,
config_version=cfg.config_version,
unrendered=unrendered,
project_env_vars=project_env_vars,
)
# sanity check - this means an internal issue
project.validate()
Expand Down Expand Up @@ -556,6 +560,7 @@ class Project:
query_comment: QueryComment
config_version: int
unrendered: RenderComponents
project_env_vars: Dict[str, Any]

@property
def all_source_paths(self) -> List[str]:
Expand Down Expand Up @@ -645,26 +650,6 @@ def partial_load(
verify_version=verify_version,
)

@classmethod
def render_from_dict(
cls,
project_root: str,
project_dict: Dict[str, Any],
packages_dict: Dict[str, Any],
selectors_dict: Dict[str, Any],
renderer: DbtProjectYamlRenderer,
*,
verify_version: bool = False
) -> 'Project':
partial = PartialProject.from_dicts(
project_root=project_root,
project_dict=project_dict,
packages_dict=packages_dict,
selectors_dict=selectors_dict,
verify_version=verify_version,
)
return partial.render(renderer)

@classmethod
def from_project_root(
cls,
Expand Down
21 changes: 20 additions & 1 deletion core/dbt/config/renderer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from typing import Dict, Any, Tuple, Optional, Union, Callable

from dbt.clients.jinja import get_rendered, catch_jinja

from dbt.context.target import TargetContext
from dbt.context.base import BaseContext
from dbt.contracts.connection import HasCredentials
from dbt.exceptions import (
DbtProjectError, CompilationException, RecursionException
)
Expand Down Expand Up @@ -98,6 +100,23 @@ def postprocess(self, value: Any, key: Keypath) -> Any:
class DbtProjectYamlRenderer(BaseRenderer):
_KEYPATH_HANDLERS = ProjectPostprocessor()

def __init__(
self, profile: Optional[HasCredentials] = None,
cli_vars: Optional[Dict[str, Any]] = None
) -> None:
# Generate contexts here because we want to save the context
# object in order to retrieve the env_vars. This is almost always
# a TargetContext, but in the debug task we want a project
# even when we don't have a profile.
if cli_vars is None:
cli_vars = {}
if profile:
self.ctx_obj = TargetContext(profile, cli_vars)
else:
self.ctx_obj = BaseContext(cli_vars) # type:ignore
context = self.ctx_obj.to_dict()
super().__init__(context)

@property
def name(self):
'Project config'
Expand Down
19 changes: 12 additions & 7 deletions core/dbt/config/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from dbt.adapters.factory import get_relation_class_by_name, get_include_paths
from dbt.helper_types import FQNPath, PathSet
from dbt.context.base import generate_base_context
from dbt.context.target import generate_target_context
from dbt.contracts.connection import AdapterRequiredConfig, Credentials
from dbt.contracts.graph.manifest import ManifestMetadata
from dbt.contracts.relation import ComponentName
Expand Down Expand Up @@ -60,6 +59,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
def __post_init__(self):
self.validate()

# Called by 'new_project' and 'from_args'
@classmethod
def from_parts(
cls,
Expand Down Expand Up @@ -116,6 +116,7 @@ def from_parts(
vars=project.vars,
config_version=project.config_version,
unrendered=project.unrendered,
project_env_vars=project.project_env_vars,
profile_name=profile.profile_name,
target_name=profile.target_name,
user_config=profile.user_config,
Expand All @@ -126,6 +127,7 @@ def from_parts(
dependencies=dependencies,
)

# Called by 'load_projects' in this class
def new_project(self, project_root: str) -> 'RuntimeConfig':
"""Given a new project root, read in its project dictionary, supply the
existing project's profile info, and create a new project file.
Expand All @@ -140,22 +142,22 @@ def new_project(self, project_root: str) -> 'RuntimeConfig':
profile.validate()

# load the new project and its packages. Don't pass cli variables.
renderer = DbtProjectYamlRenderer(generate_target_context(profile, {}))
renderer = DbtProjectYamlRenderer(profile)

project = Project.from_project_root(
project_root,
renderer,
verify_version=bool(flags.VERSION_CHECK),
)

cfg = self.from_parts(
runtime_config = self.from_parts(
project=project,
profile=profile,
args=deepcopy(self.args),
)
# force our quoting back onto the new project.
cfg.quoting = deepcopy(self.quoting)
return cfg
runtime_config.quoting = deepcopy(self.quoting)
return runtime_config

def serialize(self) -> Dict[str, Any]:
"""Serialize the full configuration to a single dictionary. For any
Expand Down Expand Up @@ -215,11 +217,12 @@ def collect_parts(

# get a new renderer using our target information and render the
# project
ctx = generate_target_context(profile, cli_vars)
project_renderer = DbtProjectYamlRenderer(ctx)
project_renderer = DbtProjectYamlRenderer(profile, cli_vars)
project = partial.render(project_renderer)
project.project_env_vars = project_renderer.ctx_obj.env_vars
return (project, profile)

# Called in main.py, lib.py, task/base.py
@classmethod
def from_args(cls, args: Any) -> 'RuntimeConfig':
"""Given arguments, read in dbt_project.yml from the current directory,
Expand Down Expand Up @@ -360,6 +363,7 @@ def load_dependencies(self) -> Mapping[str, 'RuntimeConfig']:
def clear_dependencies(self):
self.dependencies = None

# Called by 'load_dependencies' in this class
def load_projects(
self, paths: Iterable[Path]
) -> Iterator[Tuple[str, 'RuntimeConfig']]:
Expand Down Expand Up @@ -512,6 +516,7 @@ def from_parts(
vars=project.vars,
config_version=project.config_version,
unrendered=project.unrendered,
project_env_vars=project.project_env_vars,
profile_name='',
target_name='',
user_config=UnsetConfig(),
Expand Down
2 changes: 2 additions & 0 deletions core/dbt/context/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ class BaseContext(metaclass=ContextMeta):
def __init__(self, cli_vars):
self._ctx = {}
self.cli_vars = cli_vars
self.env_vars = {}

def generate_builtins(self):
builtins: Dict[str, Any] = {}
Expand Down Expand Up @@ -317,6 +318,7 @@ def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = default

if return_value is not None:
self.env_vars[var] = return_value
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
Expand Down
1 change: 1 addition & 0 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ class ParsingInfo:
@dataclass
class ManifestStateCheck(dbtClassMixin):
vars_hash: FileHash = field(default_factory=FileHash.empty)
env_vars_hash: FileHash = field(default_factory=FileHash.empty)
profile_hash: FileHash = field(default_factory=FileHash.empty)
project_hashes: MutableMapping[str, FileHash] = field(default_factory=dict)

Expand Down
4 changes: 1 addition & 3 deletions core/dbt/deps/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from dbt.exceptions import raise_dependency_error, InternalException

from dbt.context.target import generate_target_context
from dbt.config import Project, RuntimeConfig
from dbt.config.renderer import DbtProjectYamlRenderer
from dbt.deps.base import BasePackage, PinnedPackage, UnpinnedPackage
Expand Down Expand Up @@ -126,8 +125,7 @@ def resolve_packages(
pending = PackageListing.from_contracts(packages)
final = PackageListing()

ctx = generate_target_context(config, config.cli_vars)
renderer = DbtProjectYamlRenderer(ctx)
renderer = DbtProjectYamlRenderer(config, config.cli_vars)

while pending:
next_pending = PackageListing()
Expand Down
32 changes: 16 additions & 16 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class ReparseReason(StrEnum):
project_config_changed = '06_project_config_changed'
load_file_failure = '07_load_file_failure'
exception = '08_exception'
env_vars_changed = '09_env_vars_changed'


# Part of saved performance info
Expand Down Expand Up @@ -553,6 +554,10 @@ def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]:
logger.info("Unable to do partial parsing because profile has changed")
valid = False
reparse_reason = ReparseReason.profile_changed
if self.manifest.state_check.env_vars_hash != manifest.state_check.env_vars_hash:
logger.info("Unable to do partial parsing because env vars have changed")
valid = False
reparse_reason = ReparseReason.env_vars_changed

missing_keys = {
k for k in self.manifest.state_check.project_hashes
Expand Down Expand Up @@ -605,8 +610,8 @@ def read_manifest_for_partial_parse(self) -> Optional[Manifest]:
# keep this check inside the try/except in case something about
# the file has changed in weird ways, perhaps due to being a
# different version of dbt
is_partial_parseable, reparse_reason = self.is_partial_parsable(manifest)
if is_partial_parseable:
is_partial_parsable, reparse_reason = self.is_partial_parsable(manifest)
if is_partial_parsable:
# We don't want to have stale generated_at dates
manifest.metadata.generated_at = datetime.utcnow()
# or invocation_ids
Expand Down Expand Up @@ -664,6 +669,14 @@ def build_manifest_state_check(self):
])
)

# Create a hash of the env_vars in the project
key_list = list(config.project_env_vars.keys())
key_list.sort()
env_var_str = ''
for key in key_list:
env_var_str = env_var_str + f'{key}:{config.project_env_vars[key]}|'
env_vars_hash = FileHash.from_contents(env_var_str)

profile_path = os.path.join(flags.PROFILES_DIR, 'profiles.yml')
with open(profile_path) as fp:
profile_hash = FileHash.from_contents(fp.read())
Expand All @@ -675,6 +688,7 @@ def build_manifest_state_check(self):
project_hashes[name] = FileHash.from_contents(fp.read())

state_check = ManifestStateCheck(
env_vars_hash=env_vars_hash,
vars_hash=vars_hash,
profile_hash=profile_hash,
project_hashes=project_hashes,
Expand Down Expand Up @@ -923,20 +937,6 @@ def _check_manifest(manifest: Manifest, config: RuntimeConfig) -> None:
_warn_for_unused_resource_config_paths(manifest, config)


# This is just used in test cases
def _load_projects(config, paths):
for path in paths:
try:
project = config.new_project(path)
except dbt.exceptions.DbtProjectError as e:
raise dbt.exceptions.DbtProjectError(
'Failed to read package at {}: {}'
.format(path, e)
)
else:
yield project.project_name, project


def _get_node_column(node, column_name):
"""Given a ParsedNode, add some fields that might be missing. Return a
reference to the dict that refers to the given column, creating it if
Expand Down
2 changes: 2 additions & 0 deletions core/dbt/task/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ def set_log_format(cls):
@classmethod
def from_args(cls, args):
try:
# This is usually RuntimeConfig but will be UnsetProfileConfig
# for the clean or deps tasks
config = cls.ConfigType.from_args(args)
except dbt.exceptions.DbtProjectError as exc:
logger.error("Encountered an error while reading the project:")
Expand Down
12 changes: 2 additions & 10 deletions core/dbt/task/debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from dbt.config.renderer import DbtProjectYamlRenderer, ProfileRenderer
from dbt.config.utils import parse_cli_vars
from dbt.context.base import generate_base_context
from dbt.context.target import generate_target_context
from dbt.clients.yaml_helper import load_yaml_text
from dbt.links import ProfileConfigDocs
from dbt.ui import green, red
Expand Down Expand Up @@ -146,12 +145,7 @@ def _load_project(self):
self.project_fail_details = FILE_NOT_FOUND
return red('ERROR not found')

if self.profile is None:
ctx = generate_base_context(self.cli_vars)
else:
ctx = generate_target_context(self.profile, self.cli_vars)

renderer = DbtProjectYamlRenderer(ctx)
renderer = DbtProjectYamlRenderer(self.profile, self.cli_vars)

try:
self.project = Project.from_project_root(
Expand Down Expand Up @@ -198,9 +192,7 @@ def _choose_profile_names(self) -> Optional[List[str]]:
os.path.dirname(self.project_path),
verify_version=bool(flags.VERSION_CHECK),
)
renderer = DbtProjectYamlRenderer(
generate_base_context(self.cli_vars)
)
renderer = DbtProjectYamlRenderer(None, self.cli_vars)
project_profile = partial.render_profile_name(renderer)
except dbt.exceptions.DbtProjectError:
pass
Expand Down
5 changes: 1 addition & 4 deletions core/dbt/task/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from dbt.config import UnsetProfileConfig
from dbt.config.renderer import DbtProjectYamlRenderer
from dbt.context.target import generate_target_context
from dbt.deps.base import downloads_directory
from dbt.deps.resolver import resolve_packages

Expand Down Expand Up @@ -52,9 +51,7 @@ def run(self):
with downloads_directory():
final_deps = resolve_packages(packages, self.config)

renderer = DbtProjectYamlRenderer(generate_target_context(
self.config, self.config.cli_vars
))
renderer = DbtProjectYamlRenderer(self.config, self.config.cli_vars)

packages_to_upgrade = []
for package in final_deps:
Expand Down
Loading

0 comments on commit 32d9d51

Please sign in to comment.