Skip to content

Commit

Permalink
[#3885] Partially parse when environment variables in schema files ch…
Browse files Browse the repository at this point in the history
…ange (#4162)

* [#3885] Partially parse when environment variables in schema files
change

* Add documentation for test kwargs

* Add test and fix for schema configs with env_var
  • Loading branch information
gshank authored Nov 8, 2021
1 parent 229e897 commit bda70c9
Show file tree
Hide file tree
Showing 35 changed files with 818 additions and 200 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ Contributors:
- Prefer macros defined in the project over the ones in a package by default ([#4106](https://github.com/dbt-labs/dbt-core/issues/4106), [#4114](https://github.com/dbt-labs/dbt-core/pull/4114))
- Dependency updates ([#4079](https://github.com/dbt-labs/dbt-core/pull/4079)), ([#3532](https://github.com/dbt-labs/dbt-core/pull/3532)
- Schedule partial parsing for SQL files with env_var changes ([#3885](https://github.com/dbt-labs/dbt-core/issues/3885), [#4101](https://github.com/dbt-labs/dbt-core/pull/4101))
- Schedule partial parsing for schema files with env_var changes ([#3885](https://github.com/dbt-labs/dbt-core/issues/3885), [#4162](https://github.com/dbt-labs/dbt-core/pull/4162))

Contributors:
- [@sungchun12](https://github.com/sungchun12) ([#4017](https://github.com/dbt-labs/dbt/pull/4017))
Expand Down
6 changes: 4 additions & 2 deletions core/dbt/clients/jinja.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from dbt.utils import (
get_dbt_macro_name, get_docs_macro_name, get_materialization_macro_name,
get_test_macro_name, deep_map
get_test_macro_name, deep_map_render
)

from dbt.clients._jinja_blocks import BlockIterator, BlockData, BlockTag
Expand Down Expand Up @@ -661,5 +661,7 @@ def _convert_function(

return value

kwargs = deep_map(_convert_function, node.test_metadata.kwargs)
# The test_metadata.kwargs come from the test builder, and were set
# when the test node was created in _parse_generic_test.
kwargs = deep_map_render(_convert_function, node.test_metadata.kwargs)
context[GENERIC_TEST_KWARGS_NAME] = kwargs
68 changes: 2 additions & 66 deletions core/dbt/config/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from dbt.exceptions import (
DbtProjectError, CompilationException, RecursionException
)
from dbt.node_types import NodeType
from dbt.utils import deep_map
from dbt.utils import deep_map_render


Keypath = Tuple[Union[str, int], ...]
Expand Down Expand Up @@ -47,7 +46,7 @@ def render_data(
self, data: Dict[str, Any]
) -> Dict[str, Any]:
try:
return deep_map(self.render_entry, data)
return deep_map_render(self.render_entry, data)
except RecursionException:
raise DbtProjectError(
f'Cycle detected: {self.name} input has a reference to itself',
Expand Down Expand Up @@ -163,69 +162,6 @@ def name(self):
'Profile'


class SchemaYamlRenderer(BaseRenderer):
DOCUMENTABLE_NODES = frozenset(
n.pluralize() for n in NodeType.documentable()
)

@property
def name(self):
return 'Rendering yaml'

def _is_norender_key(self, keypath: Keypath) -> bool:
"""
models:
- name: blah
- description: blah
tests: ...
- columns:
- name:
- description: blah
tests: ...
Return True if it's tests or description - those aren't rendered
"""
if len(keypath) >= 2 and keypath[1] in ('tests', 'description'):
return True

if (
len(keypath) >= 4 and
keypath[1] == 'columns' and
keypath[3] in ('tests', 'description')
):
return True

return False

# don't render descriptions or test keyword arguments
def should_render_keypath(self, keypath: Keypath) -> bool:
if len(keypath) < 2:
return True

if keypath[0] not in self.DOCUMENTABLE_NODES:
return True

if len(keypath) < 3:
return True

if keypath[0] == NodeType.Source.pluralize():
if keypath[2] == 'description':
return False
if keypath[2] == 'tables':
if self._is_norender_key(keypath[3:]):
return False
elif keypath[0] == NodeType.Macro.pluralize():
if keypath[2] == 'arguments':
if self._is_norender_key(keypath[3:]):
return False
elif self._is_norender_key(keypath[1:]):
return False
else: # keypath[0] in self.DOCUMENTABLE_NODES:
if self._is_norender_key(keypath[1:]):
return False
return True


class PackageRenderer(BaseRenderer):
@property
def name(self):
Expand Down
41 changes: 35 additions & 6 deletions core/dbt/context/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@

from dbt import flags
from dbt import tracking
from dbt.clients.jinja import undefined_error, get_rendered
from dbt.clients.jinja import get_rendered
from dbt.clients.yaml_helper import ( # noqa: F401
yaml, safe_load, SafeLoader, Loader, Dumper
)
from dbt.contracts.graph.compiled import CompiledResource
from dbt.exceptions import raise_compiler_error, MacroReturn
from dbt.exceptions import raise_compiler_error, MacroReturn, raise_parsing_error
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.version import __version__ as dbt_version

Expand All @@ -21,6 +21,38 @@
import datetime
import re

# Contexts in dbt Core
# Contexts are used for Jinja rendering. They include context methods,
# executable macros, and various settings that are available in Jinja.
#
# Different contexts are used in different places because we allow access
# to different methods and data in different places. Executable SQL, for
# example, includes the available macros and the model, while Jinja in
# yaml files is more limited.
#
# The context that is passed to Jinja is always in a dictionary format,
# not an actual class, so a 'to_dict()' is executed on a context class
# before it is used for rendering.
#
# Each context has a generate_<name>_context function to create the context.
# ProviderContext subclasses have different generate functions for
# parsing and for execution.
#
# Context class hierarchy
#
# BaseContext -- core/dbt/context/base.py
# TargetContext -- core/dbt/context/target.py
# ConfiguredContext -- core/dbt/context/configured.py
# SchemaYamlContext -- core/dbt/context/configured.py
# DocsRuntimeContext -- core/dbt/context/configured.py
# MacroResolvingContext -- core/dbt/context/configured.py
# ManifestContext -- core/dbt/context/manifest.py
# QueryHeaderContext -- core/dbt/context/manifest.py
# ProviderContext -- core/dbt/context/provider.py
# MacroContext -- core/dbt/context/provider.py
# ModelContext -- core/dbt/context/provider.py
# TestContext -- core/dbt/context/provider.py


def get_pytz_module_context() -> Dict[str, Any]:
context_exports = pytz.__all__ # type: ignore
Expand Down Expand Up @@ -164,8 +196,6 @@ class BaseContext(metaclass=ContextMeta):
def __init__(self, cli_vars):
self._ctx = {}
self.cli_vars = cli_vars
# Save the env_vars encountered using this
self.env_vars = {}

def generate_builtins(self):
builtins: Dict[str, Any] = {}
Expand Down Expand Up @@ -287,11 +317,10 @@ def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = default

if return_value is not None:
self.env_vars[var] = return_value
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
undefined_error(msg)
raise_parsing_error(msg)

if os.environ.get('DBT_MACRO_DEBUGGING'):
@contextmember
Expand Down
36 changes: 31 additions & 5 deletions core/dbt/context/configured.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from typing import Any, Dict
import os
from typing import Any, Dict, Optional

from dbt.contracts.connection import AdapterRequiredConfig
from dbt.logger import SECRET_ENV_PREFIX
from dbt.node_types import NodeType
from dbt.utils import MultiDict

from dbt.context.base import contextproperty, Var
from dbt.context.base import contextproperty, contextmember, Var
from dbt.context.target import TargetContext
from dbt.exceptions import raise_parsing_error


class ConfiguredContext(TargetContext):
Expand Down Expand Up @@ -64,18 +67,41 @@ def __call__(self, var_name, default=Var._VAR_NOTSET):
return self.get_missing_var(var_name)


class SchemaYamlVars():
def __init__(self):
self.env_vars = {}
self.vars = {}


class SchemaYamlContext(ConfiguredContext):
# subclass is DocsRuntimeContext
def __init__(self, config, project_name: str):
def __init__(self, config, project_name: str, schema_yaml_vars: Optional[SchemaYamlVars]):
super().__init__(config)
self._project_name = project_name
self.schema_yaml_vars = schema_yaml_vars

@contextproperty
def var(self) -> ConfiguredVar:
return ConfiguredVar(
self._ctx, self.config, self._project_name
)

@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = None
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default

if return_value is not None:
if not var.startswith(SECRET_ENV_PREFIX) and self.schema_yaml_vars:
self.schema_yaml_vars.env_vars[var] = return_value
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)


class MacroResolvingContext(ConfiguredContext):
def __init__(self, config):
Expand All @@ -89,9 +115,9 @@ def var(self) -> ConfiguredVar:


def generate_schema_yml_context(
config: AdapterRequiredConfig, project_name: str
config: AdapterRequiredConfig, project_name: str, schema_yaml_vars: SchemaYamlVars = None
) -> Dict[str, Any]:
ctx = SchemaYamlContext(config, project_name)
ctx = SchemaYamlContext(config, project_name, schema_yaml_vars)
return ctx.to_dict()


Expand Down
2 changes: 1 addition & 1 deletion core/dbt/context/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(
manifest: Manifest,
current_project: str,
) -> None:
super().__init__(config, current_project)
super().__init__(config, current_project, None)
self.node = node
self.manifest = manifest

Expand Down
38 changes: 33 additions & 5 deletions core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@
get_adapter, get_adapter_package_names, get_adapter_type_names
)
from dbt.clients import agate_helper
from dbt.clients.jinja import get_rendered, MacroGenerator, MacroStack, undefined_error
from dbt.clients.jinja import get_rendered, MacroGenerator, MacroStack
from dbt.config import RuntimeConfig, Project
from .base import contextmember, contextproperty, Var
from .configured import FQNLookup
from .context_config import ContextConfig
from dbt.logger import SECRET_ENV_PREFIX
from dbt.context.macro_resolver import MacroResolver, TestMacroNamespace
from .macros import MacroNamespaceBuilder, MacroNamespace
from .manifest import ManifestContext
Expand Down Expand Up @@ -47,6 +48,7 @@
ref_bad_context,
source_target_not_found,
wrapped_exports,
raise_parsing_error,
)
from dbt.config import IsFQNResource
from dbt.logger import GLOBAL_LOGGER as logger, SECRET_ENV_PREFIX # noqa
Expand Down Expand Up @@ -1176,15 +1178,19 @@ def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = default

if return_value is not None:
# Save the env_var value in the manifest and the var name in the source_file
if not var.startswith(SECRET_ENV_PREFIX) and self.model:
# Save the env_var value in the manifest and the var name in the source_file.
# If this is compiling, do not save because it's irrelevant to parsing.
if (not var.startswith(SECRET_ENV_PREFIX) and self.model and
not hasattr(self.model, 'compiled')):
self.manifest.env_vars[var] = return_value
source_file = self.manifest.files[self.model.file_id]
source_file.env_vars.append(var)
# Schema files should never get here
if source_file.parse_file_type != 'schema':
source_file.env_vars.append(var)
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
undefined_error(msg)
raise_parsing_error(msg)


class MacroContext(ProviderContext):
Expand Down Expand Up @@ -1433,6 +1439,28 @@ def _build_test_namespace(self):
)
self.namespace = macro_namespace

@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = None
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default

if return_value is not None:
# Save the env_var value in the manifest and the var name in the source_file
if not var.startswith(SECRET_ENV_PREFIX) and self.model:
self.manifest.env_vars[var] = return_value
# the "model" should only be test nodes, but just in case, check
if self.model.resource_type == NodeType.Test and self.model.file_key_name:
source_file = self.manifest.files[self.model.file_id]
(yaml_key, name) = self.model.file_key_name.split('.')
source_file.add_env_var(var, yaml_key, name)
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)


def generate_test_context(
model: ManifestNode,
Expand Down
16 changes: 16 additions & 0 deletions core/dbt/contracts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,5 +301,21 @@ def get_all_test_ids(self):
test_ids.extend(self.tests[key][name])
return test_ids

def add_env_var(self, var, yaml_key, name):
if yaml_key not in self.env_vars:
self.env_vars[yaml_key] = {}
if name not in self.env_vars[yaml_key]:
self.env_vars[yaml_key][name] = []
if var not in self.env_vars[yaml_key][name]:
self.env_vars[yaml_key][name].append(var)

def delete_from_env_vars(self, yaml_key, name):
# We delete all vars for this yaml_key/name because the
# entry has been scheduled for reparsing.
if yaml_key in self.env_vars and name in self.env_vars[yaml_key]:
del self.env_vars[yaml_key][name]
if not self.env_vars[yaml_key]:
del self.env_vars[yaml_key]


AnySourceFile = Union[SchemaSourceFile, SourceFile]
1 change: 1 addition & 0 deletions core/dbt/contracts/graph/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ class CompiledGenericTestNode(CompiledNode, HasTestMetadata):
# keep this in sync with ParsedGenericTestNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
column_name: Optional[str] = None
file_key_name: Optional[str] = None
# Was not able to make mypy happy and keep the code working. We need to
# refactor the various configs.
config: TestConfig = field(default_factory=TestConfig) # type:ignore
Expand Down
4 changes: 4 additions & 0 deletions core/dbt/contracts/graph/parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,9 @@ def same_body(self: T, other: T) -> bool:
@dataclass
class TestMetadata(dbtClassMixin, Replaceable):
name: str
# kwargs are the args that are left in the test builder after
# removing configs. They are set from the test builder when
# the test node is created.
kwargs: Dict[str, Any] = field(default_factory=dict)
namespace: Optional[str] = None

Expand All @@ -432,6 +435,7 @@ class ParsedGenericTestNode(ParsedNode, HasTestMetadata):
# keep this in sync with CompiledGenericTestNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
column_name: Optional[str] = None
file_key_name: Optional[str] = None
# Was not able to make mypy happy and keep the code working. We need to
# refactor the various configs.
config: TestConfig = field(default_factory=TestConfig) # type: ignore
Expand Down
Loading

0 comments on commit bda70c9

Please sign in to comment.