Skip to content

Commit

Permalink
Performance fixes, including supporting libyaml, caching
Browse files Browse the repository at this point in the history
mapped_fields in the classes for 'from_dict', removing deepcopy
on fqn_search, separating validation from 'from_dict',
and special handling for dbt internal not_null and unique tests.
Use TestMacroNamespace instead of original in order to limit
the number of macros in the context.  Integrate mashumaro into
dbt to improve performance of 'from_dict' and 'to_dict'
  • Loading branch information
gshank committed Feb 5, 2021
1 parent 2b48152 commit 6c6649f
Show file tree
Hide file tree
Showing 109 changed files with 1,707 additions and 875 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Contributors:

### Under the hood
- Bump werkzeug upper bound dependency to `<v2.0` ([#3011](https://github.com/fishtown-analytics/dbt/pull/3011))
- Performance fixes for many different things ([#2862](https://github.com/fishtown-analytics/dbt/issues/2862), [#3034](https://github.com/fishtown-analytics/dbt/pull/3034))

Contributors:
- [@Bl3f](https://github.com/Bl3f) ([#3011](https://github.com/fishtown-analytics/dbt/pull/3011))
Expand Down
6 changes: 2 additions & 4 deletions core/dbt/adapters/base/column.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
from dataclasses import dataclass
import re
from typing import Dict, ClassVar, Any, Optional

from hologram import JsonSchemaMixin
from dbt.exceptions import RuntimeException

from typing import Dict, ClassVar, Any, Optional


@dataclass
class Column(JsonSchemaMixin):
class Column:
TYPE_LABELS: ClassVar[Dict[str, str]] = {
'STRING': 'TEXT',
'TIMESTAMP': 'TIMESTAMP',
Expand Down
10 changes: 5 additions & 5 deletions core/dbt/adapters/base/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from dbt.contracts.graph.compiled import (
CompileResultNode, CompiledSeedNode
)
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.graph.manifest import Manifest, MacroManifest
from dbt.contracts.graph.parsed import ParsedSeedNode
from dbt.exceptions import warn_or_error
from dbt.node_types import NodeType
Expand Down Expand Up @@ -160,7 +160,7 @@ def __init__(self, config):
self.config = config
self.cache = RelationsCache()
self.connections = self.ConnectionManager(config)
self._macro_manifest_lazy: Optional[Manifest] = None
self._macro_manifest_lazy: Optional[MacroManifest] = None

###
# Methods that pass through to the connection manager
Expand Down Expand Up @@ -259,18 +259,18 @@ def type(cls) -> str:
return cls.ConnectionManager.TYPE

@property
def _macro_manifest(self) -> Manifest:
def _macro_manifest(self) -> MacroManifest:
if self._macro_manifest_lazy is None:
return self.load_macro_manifest()
return self._macro_manifest_lazy

def check_macro_manifest(self) -> Optional[Manifest]:
def check_macro_manifest(self) -> Optional[MacroManifest]:
"""Return the internal manifest (used for executing macros) if it's
been initialized, otherwise return None.
"""
return self._macro_manifest_lazy

def load_macro_manifest(self) -> Manifest:
def load_macro_manifest(self) -> MacroManifest:
if self._macro_manifest_lazy is None:
# avoid a circular import
from dbt.parser.manifest import load_macro_manifest
Expand Down
2 changes: 1 addition & 1 deletion core/dbt/adapters/base/relation.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

@dataclass(frozen=True, eq=False, repr=False)
class BaseRelation(FakeAPIObject, Hashable):
type: Optional[RelationType]
path: Path
type: Optional[RelationType] = None
quote_character: str = '"'
include_policy: Policy = Policy()
quote_policy: Policy = Policy()
Expand Down
5 changes: 5 additions & 0 deletions core/dbt/clients/jinja.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ def get_macro(self):
template = self.get_template()
# make the module. previously we set both vars and local, but that's
# redundant: They both end up in the same place
# make_module is in jinja2.environment. It returns a TemplateModule
module = template.make_module(vars=self.context, shared=False)
macro = module.__dict__[get_dbt_macro_name(name)]
module.__dict__.update(self.context)
Expand All @@ -244,6 +245,7 @@ def exception_handler(self) -> Iterator[None]:
raise_compiler_error(str(e))

def call_macro(self, *args, **kwargs):
# called from __call__ methods
if self.context is None:
raise InternalException(
'Context is still None in call_macro!'
Expand Down Expand Up @@ -306,8 +308,10 @@ def exception_handler(self) -> Iterator[None]:
e.stack.append(self.macro)
raise e

# This adds the macro's unique id to the node's 'depends_on'
@contextmanager
def track_call(self):
# This is only called from __call__
if self.stack is None or self.node is None:
yield
else:
Expand All @@ -322,6 +326,7 @@ def track_call(self):
finally:
self.stack.pop(unique_id)

# this makes MacroGenerator objects callable like functions
def __call__(self, *args, **kwargs):
with self.track_call():
return self.call_macro(*args, **kwargs)
Expand Down
4 changes: 3 additions & 1 deletion core/dbt/clients/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,9 @@ def run_cmd(
return out, err


def download(url: str, path: str, timeout: Union[float, tuple] = None) -> None:
def download(
url: str, path: str, timeout: Optional[Union[float, tuple]] = None
) -> None:
path = convert_path(path)
connection_timeout = timeout or float(os.getenv('DBT_HTTP_TIMEOUT', 10))
response = requests.get(url, timeout=connection_timeout)
Expand Down
15 changes: 9 additions & 6 deletions core/dbt/clients/yaml_helper.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
from typing import Any

import dbt.exceptions

import yaml
import yaml.scanner

# the C version is faster, but it doesn't always exist
YamlLoader: Any
try:
from yaml import CSafeLoader as YamlLoader
from yaml import (
CLoader as Loader,
CSafeLoader as SafeLoader,
CDumper as Dumper
)
except ImportError:
from yaml import SafeLoader as YamlLoader
from yaml import ( # type: ignore # noqa: F401
Loader, SafeLoader, Dumper
)


YAML_ERROR_MESSAGE = """
Expand Down Expand Up @@ -54,7 +57,7 @@ def contextualized_yaml_error(raw_contents, error):


def safe_load(contents):
return yaml.load(contents, Loader=YamlLoader)
return yaml.load(contents, Loader=SafeLoader)


def load_yaml_text(contents):
Expand Down
16 changes: 10 additions & 6 deletions core/dbt/config/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Any, Dict, Optional, Tuple
import os

from hologram import ValidationError
from dbt.dataclass_schema import ValidationError

from dbt.clients.system import load_file_contents
from dbt.clients.yaml_helper import load_yaml_text
Expand Down Expand Up @@ -75,6 +75,7 @@ def read_user_config(directory: str) -> UserConfig:
if profile:
user_cfg = coerce_dict_str(profile.get('config', {}))
if user_cfg is not None:
UserConfig.validate(user_cfg)
return UserConfig.from_dict(user_cfg)
except (RuntimeException, ValidationError):
pass
Expand Down Expand Up @@ -137,10 +138,10 @@ def __eq__(self, other: object) -> bool:
def validate(self):
try:
if self.credentials:
self.credentials.to_dict(validate=True)
ProfileConfig.from_dict(
self.to_profile_info(serialize_credentials=True)
)
dct = self.credentials.to_dict()
self.credentials.validate(dct)
dct = self.to_profile_info(serialize_credentials=True)
ProfileConfig.validate(dct)
except ValidationError as exc:
raise DbtProfileError(validator_error_message(exc)) from exc

Expand All @@ -160,7 +161,9 @@ def _credentials_from_profile(
typename = profile.pop('type')
try:
cls = load_plugin(typename)
credentials = cls.from_dict(profile)
data = cls.translate_aliases(profile)
cls.validate(data)
credentials = cls.from_dict(data)
except (RuntimeException, ValidationError) as e:
msg = str(e) if isinstance(e, RuntimeException) else e.message
raise DbtProfileError(
Expand Down Expand Up @@ -233,6 +236,7 @@ def from_credentials(
"""
if user_cfg is None:
user_cfg = {}
UserConfig.validate(user_cfg)
config = UserConfig.from_dict(user_cfg)

profile = cls(
Expand Down
13 changes: 7 additions & 6 deletions core/dbt/config/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,12 @@
from dbt.utils import MultiDict
from dbt.node_types import NodeType
from dbt.config.selectors import SelectorDict

from dbt.contracts.project import (
Project as ProjectContract,
SemverString,
)
from dbt.contracts.project import PackageConfig

from hologram import ValidationError

from dbt.dataclass_schema import ValidationError
from .renderer import DbtProjectYamlRenderer
from .selectors import (
selector_config_from_data,
Expand Down Expand Up @@ -101,6 +98,7 @@ def package_config_from_data(packages_data: Dict[str, Any]):
packages_data = {'packages': []}

try:
PackageConfig.validate(packages_data)
packages = PackageConfig.from_dict(packages_data)
except ValidationError as e:
raise DbtProjectError(
Expand Down Expand Up @@ -306,7 +304,10 @@ def create_project(self, rendered: RenderComponents) -> 'Project':
)

try:
cfg = ProjectContract.from_dict(rendered.project_dict)
ProjectContract.validate(rendered.project_dict)
cfg = ProjectContract.from_dict(
rendered.project_dict
)
except ValidationError as e:
raise DbtProjectError(validator_error_message(e)) from e
# name/version are required in the Project definition, so we can assume
Expand Down Expand Up @@ -586,7 +587,7 @@ def to_project_config(self, with_packages=False):

def validate(self):
try:
ProjectContract.from_dict(self.to_project_config())
ProjectContract.validate(self.to_project_config())
except ValidationError as e:
raise DbtProjectError(validator_error_message(e)) from e

Expand Down
6 changes: 3 additions & 3 deletions core/dbt/config/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
raise_compiler_error
)

from hologram import ValidationError
from dbt.dataclass_schema import ValidationError


def _project_quoting_dict(
Expand Down Expand Up @@ -174,7 +174,7 @@ def validate(self):
:raises DbtProjectError: If the configuration fails validation.
"""
try:
Configuration.from_dict(self.serialize())
Configuration.validate(self.serialize())
except ValidationError as e:
raise DbtProjectError(validator_error_message(e)) from e

Expand Down Expand Up @@ -391,7 +391,7 @@ def __getattribute__(self, name):
f"'UnsetConfig' object has no attribute {name}"
)

def to_dict(self):
def __post_serialize__(self, dct, options=None):
return {}


Expand Down
16 changes: 9 additions & 7 deletions core/dbt/config/selectors.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from pathlib import Path
from typing import Dict, Any
import yaml

from hologram import ValidationError
from dbt.clients.yaml_helper import ( # noqa: F401
yaml, Loader, Dumper, load_yaml_text
)
from dbt.dataclass_schema import ValidationError

from .renderer import SelectorRenderer

Expand All @@ -11,7 +12,6 @@
path_exists,
resolve_path_from_base,
)
from dbt.clients.yaml_helper import load_yaml_text
from dbt.contracts.selection import SelectorFile
from dbt.exceptions import DbtSelectorsError, RuntimeException
from dbt.graph import parse_from_selectors_definition, SelectionSpec
Expand All @@ -30,9 +30,11 @@


class SelectorConfig(Dict[str, SelectionSpec]):

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'SelectorConfig':
def selectors_from_dict(cls, data: Dict[str, Any]) -> 'SelectorConfig':
try:
SelectorFile.validate(data)
selector_file = SelectorFile.from_dict(data)
selectors = parse_from_selectors_definition(selector_file)
except ValidationError as exc:
Expand Down Expand Up @@ -66,7 +68,7 @@ def render_from_dict(
f'Could not render selector data: {exc}',
result_type='invalid_selector',
) from exc
return cls.from_dict(rendered)
return cls.selectors_from_dict(rendered)

@classmethod
def from_path(
Expand Down Expand Up @@ -107,7 +109,7 @@ def selector_config_from_data(
selectors_data = {'selectors': []}

try:
selectors = SelectorConfig.from_dict(selectors_data)
selectors = SelectorConfig.selectors_from_dict(selectors_data)
except ValidationError as e:
raise DbtSelectorsError(
MALFORMED_SELECTOR_ERROR.format(error=str(e.message)),
Expand Down
8 changes: 5 additions & 3 deletions core/dbt/context/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
from dbt import flags
from dbt import tracking
from dbt.clients.jinja import undefined_error, get_rendered
from dbt.clients import yaml_helper
from dbt.clients.yaml_helper import ( # noqa: F401
yaml, safe_load, SafeLoader, Loader, Dumper
)
from dbt.contracts.graph.compiled import CompiledResource
from dbt.exceptions import raise_compiler_error, MacroReturn
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.version import __version__ as dbt_version

import yaml
# These modules are added to the context. Consider alternative
# approaches which will extend well to potentially many modules
import pytz
Expand Down Expand Up @@ -172,6 +173,7 @@ def generate_builtins(self):
builtins[key] = value
return builtins

# no dbtClassMixin so this is not an actual override
def to_dict(self):
self._ctx['context'] = self._ctx
builtins = self.generate_builtins()
Expand Down Expand Up @@ -394,7 +396,7 @@ def fromyaml(value: str, default: Any = None) -> Any:
-- ["good"]
"""
try:
return yaml_helper.safe_load(value)
return safe_load(value)
except (AttributeError, ValueError, yaml.YAMLError):
return default

Expand Down
2 changes: 1 addition & 1 deletion core/dbt/context/context_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def initial_result(self, resource_type: NodeType, base: bool) -> C:
# Calculate the defaults. We don't want to validate the defaults,
# because it might be invalid in the case of required config members
# (such as on snapshots!)
result = config_cls.from_dict({}, validate=False)
result = config_cls.from_dict({})
return result

def _update_from_config(
Expand Down
Loading

0 comments on commit 6c6649f

Please sign in to comment.