Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#3885] Partially parse when environment variables in schema files change #4162

Merged
merged 3 commits into from
Nov 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Contributors:
- Prefer macros defined in the project over the ones in a package by default ([#4106](https://github.com/dbt-labs/dbt-core/issues/4106), [#4114](https://github.com/dbt-labs/dbt-core/pull/4114))
- Dependency updates ([#4079](https://github.com/dbt-labs/dbt-core/pull/4079)), ([#3532](https://github.com/dbt-labs/dbt-core/pull/3532)
- Schedule partial parsing for SQL files with env_var changes ([#3885](https://github.com/dbt-labs/dbt-core/issues/3885), [#4101](https://github.com/dbt-labs/dbt-core/pull/4101))
- Schedule partial parsing for schema files with env_var changes ([#3885](https://github.com/dbt-labs/dbt-core/issues/3885), [#4162](https://github.com/dbt-labs/dbt-core/pull/4162))

Contributors:
- [@sungchun12](https://github.com/sungchun12) ([#4017](https://github.com/dbt-labs/dbt/pull/4017))
Expand Down
6 changes: 4 additions & 2 deletions core/dbt/clients/jinja.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from dbt.utils import (
get_dbt_macro_name, get_docs_macro_name, get_materialization_macro_name,
get_test_macro_name, deep_map
get_test_macro_name, deep_map_render
)

from dbt.clients._jinja_blocks import BlockIterator, BlockData, BlockTag
Expand Down Expand Up @@ -661,5 +661,7 @@ def _convert_function(

return value

kwargs = deep_map(_convert_function, node.test_metadata.kwargs)
# The test_metadata.kwargs come from the test builder, and were set
# when the test node was created in _parse_generic_test.
kwargs = deep_map_render(_convert_function, node.test_metadata.kwargs)
context[GENERIC_TEST_KWARGS_NAME] = kwargs
68 changes: 2 additions & 66 deletions core/dbt/config/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from dbt.exceptions import (
DbtProjectError, CompilationException, RecursionException
)
from dbt.node_types import NodeType
from dbt.utils import deep_map
from dbt.utils import deep_map_render


Keypath = Tuple[Union[str, int], ...]
Expand Down Expand Up @@ -47,7 +46,7 @@ def render_data(
self, data: Dict[str, Any]
) -> Dict[str, Any]:
try:
return deep_map(self.render_entry, data)
return deep_map_render(self.render_entry, data)
except RecursionException:
raise DbtProjectError(
f'Cycle detected: {self.name} input has a reference to itself',
Expand Down Expand Up @@ -163,69 +162,6 @@ def name(self):
'Profile'


class SchemaYamlRenderer(BaseRenderer):
DOCUMENTABLE_NODES = frozenset(
n.pluralize() for n in NodeType.documentable()
)

@property
def name(self):
return 'Rendering yaml'

def _is_norender_key(self, keypath: Keypath) -> bool:
"""
models:
- name: blah
- description: blah
tests: ...
- columns:
- name:
- description: blah
tests: ...

Return True if it's tests or description - those aren't rendered
"""
if len(keypath) >= 2 and keypath[1] in ('tests', 'description'):
return True

if (
len(keypath) >= 4 and
keypath[1] == 'columns' and
keypath[3] in ('tests', 'description')
):
return True

return False

# don't render descriptions or test keyword arguments
def should_render_keypath(self, keypath: Keypath) -> bool:
if len(keypath) < 2:
return True

if keypath[0] not in self.DOCUMENTABLE_NODES:
return True

if len(keypath) < 3:
return True

if keypath[0] == NodeType.Source.pluralize():
if keypath[2] == 'description':
return False
if keypath[2] == 'tables':
if self._is_norender_key(keypath[3:]):
return False
elif keypath[0] == NodeType.Macro.pluralize():
if keypath[2] == 'arguments':
if self._is_norender_key(keypath[3:]):
return False
elif self._is_norender_key(keypath[1:]):
return False
else: # keypath[0] in self.DOCUMENTABLE_NODES:
if self._is_norender_key(keypath[1:]):
return False
return True


class PackageRenderer(BaseRenderer):
@property
def name(self):
Expand Down
41 changes: 35 additions & 6 deletions core/dbt/context/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@

from dbt import flags
from dbt import tracking
from dbt.clients.jinja import undefined_error, get_rendered
from dbt.clients.jinja import get_rendered
from dbt.clients.yaml_helper import ( # noqa: F401
yaml, safe_load, SafeLoader, Loader, Dumper
)
from dbt.contracts.graph.compiled import CompiledResource
from dbt.exceptions import raise_compiler_error, MacroReturn
from dbt.exceptions import raise_compiler_error, MacroReturn, raise_parsing_error
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.version import __version__ as dbt_version

Expand All @@ -21,6 +21,38 @@
import datetime
import re

# Contexts in dbt Core
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is simply outstandingly helpful. Thank you x1000

# Contexts are used for Jinja rendering. They include context methods,
# executable macros, and various settings that are available in Jinja.
#
# Different contexts are used in different places because we allow access
# to different methods and data in different places. Executable SQL, for
# example, includes the available macros and the model, while Jinja in
# yaml files is more limited.
#
# The context that is passed to Jinja is always in a dictionary format,
# not an actual class, so a 'to_dict()' is executed on a context class
# before it is used for rendering.
#
# Each context has a generate_<name>_context function to create the context.
# ProviderContext subclasses have different generate functions for
# parsing and for execution.
#
# Context class hierarchy
#
# BaseContext -- core/dbt/context/base.py
# TargetContext -- core/dbt/context/target.py
# ConfiguredContext -- core/dbt/context/configured.py
# SchemaYamlContext -- core/dbt/context/configured.py
# DocsRuntimeContext -- core/dbt/context/configured.py
# MacroResolvingContext -- core/dbt/context/configured.py
# ManifestContext -- core/dbt/context/manifest.py
# QueryHeaderContext -- core/dbt/context/manifest.py
# ProviderContext -- core/dbt/context/provider.py
# MacroContext -- core/dbt/context/provider.py
# ModelContext -- core/dbt/context/provider.py
# TestContext -- core/dbt/context/provider.py


def get_pytz_module_context() -> Dict[str, Any]:
context_exports = pytz.__all__ # type: ignore
Expand Down Expand Up @@ -164,8 +196,6 @@ class BaseContext(metaclass=ContextMeta):
def __init__(self, cli_vars):
self._ctx = {}
self.cli_vars = cli_vars
# Save the env_vars encountered using this
self.env_vars = {}

def generate_builtins(self):
builtins: Dict[str, Any] = {}
Expand Down Expand Up @@ -287,11 +317,10 @@ def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = default

if return_value is not None:
self.env_vars[var] = return_value
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
undefined_error(msg)
raise_parsing_error(msg)

if os.environ.get('DBT_MACRO_DEBUGGING'):
@contextmember
Expand Down
36 changes: 31 additions & 5 deletions core/dbt/context/configured.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from typing import Any, Dict
import os
from typing import Any, Dict, Optional

from dbt.contracts.connection import AdapterRequiredConfig
from dbt.logger import SECRET_ENV_PREFIX
from dbt.node_types import NodeType
from dbt.utils import MultiDict

from dbt.context.base import contextproperty, Var
from dbt.context.base import contextproperty, contextmember, Var
from dbt.context.target import TargetContext
from dbt.exceptions import raise_parsing_error


class ConfiguredContext(TargetContext):
Expand Down Expand Up @@ -64,18 +67,41 @@ def __call__(self, var_name, default=Var._VAR_NOTSET):
return self.get_missing_var(var_name)


class SchemaYamlVars():
def __init__(self):
self.env_vars = {}
self.vars = {}


class SchemaYamlContext(ConfiguredContext):
# subclass is DocsRuntimeContext
def __init__(self, config, project_name: str):
def __init__(self, config, project_name: str, schema_yaml_vars: Optional[SchemaYamlVars]):
super().__init__(config)
self._project_name = project_name
self.schema_yaml_vars = schema_yaml_vars

@contextproperty
def var(self) -> ConfiguredVar:
return ConfiguredVar(
self._ctx, self.config, self._project_name
)

@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = None
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default

if return_value is not None:
if not var.startswith(SECRET_ENV_PREFIX) and self.schema_yaml_vars:
self.schema_yaml_vars.env_vars[var] = return_value
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)


class MacroResolvingContext(ConfiguredContext):
def __init__(self, config):
Expand All @@ -89,9 +115,9 @@ def var(self) -> ConfiguredVar:


def generate_schema_yml_context(
config: AdapterRequiredConfig, project_name: str
config: AdapterRequiredConfig, project_name: str, schema_yaml_vars: SchemaYamlVars = None
) -> Dict[str, Any]:
ctx = SchemaYamlContext(config, project_name)
ctx = SchemaYamlContext(config, project_name, schema_yaml_vars)
return ctx.to_dict()


Expand Down
2 changes: 1 addition & 1 deletion core/dbt/context/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(
manifest: Manifest,
current_project: str,
) -> None:
super().__init__(config, current_project)
super().__init__(config, current_project, None)
self.node = node
self.manifest = manifest

Expand Down
38 changes: 33 additions & 5 deletions core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@
get_adapter, get_adapter_package_names, get_adapter_type_names
)
from dbt.clients import agate_helper
from dbt.clients.jinja import get_rendered, MacroGenerator, MacroStack, undefined_error
from dbt.clients.jinja import get_rendered, MacroGenerator, MacroStack
from dbt.config import RuntimeConfig, Project
from .base import contextmember, contextproperty, Var
from .configured import FQNLookup
from .context_config import ContextConfig
from dbt.logger import SECRET_ENV_PREFIX
from dbt.context.macro_resolver import MacroResolver, TestMacroNamespace
from .macros import MacroNamespaceBuilder, MacroNamespace
from .manifest import ManifestContext
Expand Down Expand Up @@ -47,6 +48,7 @@
ref_bad_context,
source_target_not_found,
wrapped_exports,
raise_parsing_error,
)
from dbt.config import IsFQNResource
from dbt.logger import GLOBAL_LOGGER as logger, SECRET_ENV_PREFIX # noqa
Expand Down Expand Up @@ -1176,15 +1178,19 @@ def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = default

if return_value is not None:
# Save the env_var value in the manifest and the var name in the source_file
if not var.startswith(SECRET_ENV_PREFIX) and self.model:
# Save the env_var value in the manifest and the var name in the source_file.
# If this is compiling, do not save because it's irrelevant to parsing.
if (not var.startswith(SECRET_ENV_PREFIX) and self.model and
not hasattr(self.model, 'compiled')):
self.manifest.env_vars[var] = return_value
source_file = self.manifest.files[self.model.file_id]
source_file.env_vars.append(var)
# Schema files should never get here
if source_file.parse_file_type != 'schema':
source_file.env_vars.append(var)
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
undefined_error(msg)
raise_parsing_error(msg)


class MacroContext(ProviderContext):
Expand Down Expand Up @@ -1428,6 +1434,28 @@ def _build_test_namespace(self):
)
self.namespace = macro_namespace

@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = None
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default

if return_value is not None:
# Save the env_var value in the manifest and the var name in the source_file
if not var.startswith(SECRET_ENV_PREFIX) and self.model:
self.manifest.env_vars[var] = return_value
# the "model" should only be test nodes, but just in case, check
if self.model.resource_type == NodeType.Test and self.model.file_key_name:
source_file = self.manifest.files[self.model.file_id]
(yaml_key, name) = self.model.file_key_name.split('.')
source_file.add_env_var(var, yaml_key, name)
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)


def generate_test_context(
model: ManifestNode,
Expand Down
16 changes: 16 additions & 0 deletions core/dbt/contracts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,5 +301,21 @@ def get_all_test_ids(self):
test_ids.extend(self.tests[key][name])
return test_ids

def add_env_var(self, var, yaml_key, name):
if yaml_key not in self.env_vars:
self.env_vars[yaml_key] = {}
if name not in self.env_vars[yaml_key]:
self.env_vars[yaml_key][name] = []
if var not in self.env_vars[yaml_key][name]:
self.env_vars[yaml_key][name].append(var)

def delete_from_env_vars(self, yaml_key, name):
# We delete all vars for this yaml_key/name because the
# entry has been scheduled for reparsing.
if yaml_key in self.env_vars and name in self.env_vars[yaml_key]:
del self.env_vars[yaml_key][name]
if not self.env_vars[yaml_key]:
del self.env_vars[yaml_key]


AnySourceFile = Union[SchemaSourceFile, SourceFile]
1 change: 1 addition & 0 deletions core/dbt/contracts/graph/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ class CompiledGenericTestNode(CompiledNode, HasTestMetadata):
# keep this in sync with ParsedGenericTestNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
column_name: Optional[str] = None
file_key_name: Optional[str] = None
# Was not able to make mypy happy and keep the code working. We need to
# refactor the various configs.
config: TestConfig = field(default_factory=TestConfig) # type:ignore
Expand Down
4 changes: 4 additions & 0 deletions core/dbt/contracts/graph/parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,9 @@ def same_body(self: T, other: T) -> bool:
@dataclass
class TestMetadata(dbtClassMixin, Replaceable):
name: str
# kwargs are the args that are left in the test builder after
# removing configs. They are set from the test builder when
# the test node is created.
kwargs: Dict[str, Any] = field(default_factory=dict)
namespace: Optional[str] = None

Expand All @@ -432,6 +435,7 @@ class ParsedGenericTestNode(ParsedNode, HasTestMetadata):
# keep this in sync with CompiledGenericTestNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
column_name: Optional[str] = None
file_key_name: Optional[str] = None
# Was not able to make mypy happy and keep the code working. We need to
# refactor the various configs.
config: TestConfig = field(default_factory=TestConfig) # type: ignore
Expand Down
Loading