From a1aa8639fec8186654bb0c57276433adb09a01a8 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Fri, 9 Feb 2018 20:33:47 -0500 Subject: [PATCH 1/4] Fix for redshift varchar bug (#647) * Fix for redshift varchar bug * pep8 on a sql string, smh --- dbt/adapters/redshift.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/adapters/redshift.py b/dbt/adapters/redshift.py index 278851d546a..07aba16518a 100644 --- a/dbt/adapters/redshift.py +++ b/dbt/adapters/redshift.py @@ -45,7 +45,7 @@ def _get_columns_in_table_sql(cls, schema_name, table_name): col_type, case when col_type like 'character%' - then REGEXP_SUBSTR(col_type, '[0-9]+')::int + then nullif(REGEXP_SUBSTR(col_type, '[0-9]+'), '')::int else null end as character_maximum_length From 76098ea883f5a8a873104ab371e34172d4e65e18 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Fri, 9 Feb 2018 20:34:06 -0500 Subject: [PATCH 2/4] Set global variable overrides on the command line with --vars (#640) * Set global variable overrides on the command line with --vars * pep8 * integration tests for cli vars --- dbt/main.py | 10 ++++ dbt/project.py | 6 +++ dbt/utils.py | 18 +++++++ .../models_complex/complex_model.sql | 6 +++ .../028_cli_vars/models_complex/schema.yml | 7 +++ .../028_cli_vars/models_simple/schema.yml | 5 ++ .../models_simple/simple_model.sql | 4 ++ .../integration/028_cli_vars/test_cli_vars.py | 54 +++++++++++++++++++ 8 files changed, 110 insertions(+) create mode 100644 test/integration/028_cli_vars/models_complex/complex_model.sql create mode 100644 test/integration/028_cli_vars/models_complex/schema.yml create mode 100644 test/integration/028_cli_vars/models_simple/schema.yml create mode 100644 test/integration/028_cli_vars/models_simple/simple_model.sql create mode 100644 test/integration/028_cli_vars/test_cli_vars.py diff --git a/dbt/main.py b/dbt/main.py index 8f7c7c892c1..0890e64a61c 100644 --- a/dbt/main.py +++ b/dbt/main.py @@ -295,6 +295,16 @@ def parse_args(args): help='Which target to load for the given profile' ) + base_subparser.add_argument( + '--vars', + type=str, + default='{}', + help=""" + Supply variables to the project. This argument overrides + variables defined in your dbt_project.yml file. This argument + should be a YAML string, eg. '{my_variable: my_value}'""" + ) + sub = subs.add_parser('init', parents=[base_subparser]) sub.add_argument('project_name', type=str, help='Name of the new project') sub.set_defaults(cls=init_task.InitTask, which='init') diff --git a/dbt/project.py b/dbt/project.py index 1f97ddd689a..bdb1f811076 100644 --- a/dbt/project.py +++ b/dbt/project.py @@ -87,6 +87,12 @@ def __init__(self, cfg, profiles, profiles_dir, profile_to_load=None, "Could not find profile named '{}'" .format(self.profile_to_load), self) + global_vars = dbt.utils.parse_cli_vars(getattr(args, 'vars', '{}')) + if 'vars' not in self.cfg['models']: + self.cfg['models']['vars'] = {} + + self.cfg['models']['vars'].update(global_vars) + def __str__(self): return pprint.pformat({'project': self.cfg, 'profiles': self.profiles}) diff --git a/dbt/utils.py b/dbt/utils.py index 7830b6b337b..c0008b95e5a 100644 --- a/dbt/utils.py +++ b/dbt/utils.py @@ -9,6 +9,7 @@ from dbt.compat import basestring from dbt.logger import GLOBAL_LOGGER as logger from dbt.node_types import NodeType +from dbt.clients import yaml_helper DBTConfigKeys = [ @@ -375,3 +376,20 @@ def invalid_ref_fail_unless_test(node, target_model_name, node, target_model_name, target_model_package) + + +def parse_cli_vars(var_string): + try: + cli_vars = yaml_helper.load_yaml_text(var_string) + var_type = type(cli_vars) + if var_type == dict: + return cli_vars + else: + type_name = var_type.__name__ + dbt.exceptions.raise_compiler_error( + "The --vars argument must be a YAML dictionary, but was " + "of type '{}'".format(type_name)) + except dbt.exceptions.ValidationException as e: + logger.error( + "The YAML provided in the --vars argument is not valid.\n") + raise diff --git a/test/integration/028_cli_vars/models_complex/complex_model.sql b/test/integration/028_cli_vars/models_complex/complex_model.sql new file mode 100644 index 00000000000..1022c648100 --- /dev/null +++ b/test/integration/028_cli_vars/models_complex/complex_model.sql @@ -0,0 +1,6 @@ + +select + '{{ var("variable_1") }}'::varchar as var_1, + '{{ var("variable_2")[0] }}'::varchar as var_2, + '{{ var("variable_3")["value"] }}'::varchar as var_3 + diff --git a/test/integration/028_cli_vars/models_complex/schema.yml b/test/integration/028_cli_vars/models_complex/schema.yml new file mode 100644 index 00000000000..1b9f3156167 --- /dev/null +++ b/test/integration/028_cli_vars/models_complex/schema.yml @@ -0,0 +1,7 @@ + +complex_model: + constraints: + accepted_values: + - {field: var_1, values: ["abc"]} + - {field: var_2, values: ["def"]} + - {field: var_3, values: ["jkl"]} diff --git a/test/integration/028_cli_vars/models_simple/schema.yml b/test/integration/028_cli_vars/models_simple/schema.yml new file mode 100644 index 00000000000..98e80979a55 --- /dev/null +++ b/test/integration/028_cli_vars/models_simple/schema.yml @@ -0,0 +1,5 @@ + +simple_model: + constraints: + accepted_values: + - {field: simple, values: ["abc"]} diff --git a/test/integration/028_cli_vars/models_simple/simple_model.sql b/test/integration/028_cli_vars/models_simple/simple_model.sql new file mode 100644 index 00000000000..084bd57012e --- /dev/null +++ b/test/integration/028_cli_vars/models_simple/simple_model.sql @@ -0,0 +1,4 @@ + +select + '{{ var("simple") }}'::varchar as simple + diff --git a/test/integration/028_cli_vars/test_cli_vars.py b/test/integration/028_cli_vars/test_cli_vars.py new file mode 100644 index 00000000000..347a20e4dac --- /dev/null +++ b/test/integration/028_cli_vars/test_cli_vars.py @@ -0,0 +1,54 @@ +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest +import yaml + + +class TestCLIVars(DBTIntegrationTest): + @property + def schema(self): + return "cli_vars_028" + + @property + def models(self): + return "test/integration/028_cli_vars/models_complex" + + @attr(type='postgres') + def test__cli_vars_longform(self): + self.use_default_project() + self.use_profile('postgres') + + cli_vars = { + "variable_1": "abc", + "variable_2": ["def", "ghi"], + "variable_3": { + "value": "jkl" + } + } + self.run_dbt(["run", "--vars", yaml.dump(cli_vars)]) + self.run_dbt(["test"]) + + +class TestCLIVarsSimple(DBTIntegrationTest): + @property + def schema(self): + return "cli_vars_028" + + @property + def models(self): + return "test/integration/028_cli_vars/models_simple" + + @attr(type='postgres') + def test__cli_vars_shorthand(self): + self.use_default_project() + self.use_profile('postgres') + + self.run_dbt(["run", "--vars", "simple: abc"]) + self.run_dbt(["test"]) + + @attr(type='postgres') + def test__cli_vars_longer(self): + self.use_default_project() + self.use_profile('postgres') + + self.run_dbt(["run", "--vars", "{simple: abc, unused: def}"]) + self.run_dbt(["test"]) From 0372fefae0e194dfe357d952e1e7cb2f336fe231 Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Sat, 10 Feb 2018 11:28:24 -0500 Subject: [PATCH 3/4] Seed rewrite (#618) * loader for seed data files * Functioning rework of seed task * Make CompilerRunner fns private and impl. SeedRunner.compile Trying to distinguish between the public/private interface for this class. And the SeedRunner doesn't need the functionality in the compile function, it just needs a compile function to exist for use in the compilation process. * Test changes and fixes * make the DB setup script usable locally * convert simple copy test to use seeed * Fixes to get Snowflake working * New seed flag and make it non-destructive by default * Convert update SQL script to another seed * cleanup * implement bigquery csv load * context handling of StringIO * Better typing * strip seeder and csvkit dependency * update bigquery to use new data typing and to fix unicode issue * update seed test * fix abstract functions in base adapter * support time type * try pinning crypto, pyopenssl versions * remove unnecessary version pins * insert all at once, rather than one query per row * do not quote field names on creation * bad * quiet down parsedatetime logger * pep8 * UI updates + node conformity for seed nodes * add seed to list of resource types, cleanup * show option for CSVs * typo * pep8 * move agate import to avoid strange warnings * deprecation warning for --drop-existing * quote column names in seed files * revert quoting change (breaks Snowflake). Hush warnings --- dbt/adapters/bigquery.py | 56 ++++- dbt/adapters/default.py | 85 +++++++- dbt/adapters/postgres.py | 71 +++++++ dbt/adapters/redshift.py | 11 + dbt/adapters/snowflake.py | 10 +- dbt/compilation.py | 11 +- dbt/contracts/graph/parsed.py | 4 + dbt/contracts/graph/unparsed.py | 3 +- dbt/deprecations.py | 8 + dbt/loader.py | 15 ++ dbt/logger.py | 1 + dbt/main.py | 29 ++- dbt/node_runners.py | 64 +++++- dbt/node_types.py | 4 +- dbt/parser.py | 56 ++++- dbt/runner.py | 2 +- dbt/seeder.py | 139 ------------ dbt/task/seed.py | 51 ++++- dbt/ui/printer.py | 16 ++ dbt/utils.py | 13 ++ requirements.txt | 2 +- setup.py | 2 +- .../seed-initial/seed.csv | 101 +++++++++ .../001_simple_copy_test/seed-update/seed.csv | 201 ++++++++++++++++++ .../integration/001_simple_copy_test/seed.sql | 111 ---------- .../001_simple_copy_test/test_simple_copy.py | 42 ++-- .../001_simple_copy_test/update.sql | 101 --------- .../integration/005_simple_seed_test/seed.sql | 6 +- .../023_exit_codes_test/data-bad/data.csv | 4 +- .../023_exit_codes_test/test_exit_codes.py | 7 +- test/integration/base.py | 9 +- test/setup_db.sh | 19 +- 32 files changed, 825 insertions(+), 429 deletions(-) delete mode 100644 dbt/seeder.py create mode 100644 test/integration/001_simple_copy_test/seed-initial/seed.csv create mode 100644 test/integration/001_simple_copy_test/seed-update/seed.csv delete mode 100644 test/integration/001_simple_copy_test/seed.sql delete mode 100644 test/integration/001_simple_copy_test/update.sql diff --git a/dbt/adapters/bigquery.py b/dbt/adapters/bigquery.py index 21d3111b394..4e3664986ea 100644 --- a/dbt/adapters/bigquery.py +++ b/dbt/adapters/bigquery.py @@ -40,7 +40,9 @@ class BigQueryAdapter(PostgresAdapter): def handle_error(cls, error, message, sql): logger.debug(message.format(sql=sql)) logger.debug(error) - error_msg = "\n".join([error['message'] for error in error.errors]) + error_msg = "\n".join( + [item['message'] for item in error.errors]) + raise dbt.exceptions.DatabaseException(error_msg) @classmethod @@ -372,7 +374,8 @@ def warning_on_hooks(cls, hook_type): dbt.ui.printer.COLOR_FG_YELLOW) @classmethod - def add_query(cls, profile, sql, model_name=None, auto_begin=True): + def add_query(cls, profile, sql, model_name=None, auto_begin=True, + bindings=None): if model_name in ['on-run-start', 'on-run-end']: cls.warning_on_hooks(model_name) else: @@ -395,3 +398,52 @@ def quote_schema_and_table(cls, profile, schema, table, model_name=None): return '{}.{}.{}'.format(cls.quote(project), cls.quote(schema), cls.quote(table)) + + @classmethod + def convert_text_type(cls, agate_table, col_idx): + return "string" + + @classmethod + def convert_number_type(cls, agate_table, col_idx): + import agate + decimals = agate_table.aggregate(agate.MaxPrecision(col_idx)) + return "float64" if decimals else "int64" + + @classmethod + def convert_boolean_type(cls, agate_table, col_idx): + return "bool" + + @classmethod + def convert_datetime_type(cls, agate_table, col_idx): + return "datetime" + + @classmethod + def create_csv_table(cls, profile, schema, table_name, agate_table): + pass + + @classmethod + def reset_csv_table(cls, profile, schema, table_name, agate_table, + full_refresh=False): + cls.drop(profile, schema, table_name, "table") + + @classmethod + def _agate_to_schema(cls, agate_table): + bq_schema = [] + for idx, col_name in enumerate(agate_table.column_names): + type_ = cls.convert_agate_type(agate_table, idx) + bq_schema.append( + google.cloud.bigquery.SchemaField(col_name, type_)) + return bq_schema + + @classmethod + def load_csv_rows(cls, profile, schema, table_name, agate_table): + bq_schema = cls._agate_to_schema(agate_table) + dataset = cls.get_dataset(profile, schema, None) + table = dataset.table(table_name, schema=bq_schema) + conn = cls.get_connection(profile, None) + client = conn.get('handle') + with open(agate_table.original_abspath, "rb") as f: + job = table.upload_from_file(f, "CSV", rewind=True, + client=client, skip_leading_rows=1) + with cls.exception_handler(profile, "LOAD TABLE"): + cls.poll_until_job_completes(job, cls.get_timeout(conn)) diff --git a/dbt/adapters/default.py b/dbt/adapters/default.py index a7f99eac726..a2046e64c17 100644 --- a/dbt/adapters/default.py +++ b/dbt/adapters/default.py @@ -94,6 +94,22 @@ def cancel_connection(cls, project, connection): raise dbt.exceptions.NotImplementedException( '`cancel_connection` is not implemented for this adapter!') + @classmethod + def create_csv_table(cls, profile, schema, table_name, agate_table): + raise dbt.exceptions.NotImplementedException( + '`create_csv_table` is not implemented for this adapter!') + + @classmethod + def reset_csv_table(cls, profile, schema, table_name, agate_table, + full_refresh=False): + raise dbt.exceptions.NotImplementedException( + '`reset_csv_table` is not implemented for this adapter!') + + @classmethod + def load_csv_rows(cls, profile, schema, table_name, agate_table): + raise dbt.exceptions.NotImplementedException( + '`load_csv_rows` is not implemented for this adapter!') + ### # FUNCTIONS THAT SHOULD BE ABSTRACT ### @@ -507,7 +523,8 @@ def close(cls, connection): return connection @classmethod - def add_query(cls, profile, sql, model_name=None, auto_begin=True): + def add_query(cls, profile, sql, model_name=None, auto_begin=True, + bindings=None): connection = cls.get_connection(profile, model_name) connection_name = connection.get('name') @@ -522,7 +539,7 @@ def add_query(cls, profile, sql, model_name=None, auto_begin=True): pre = time.time() cursor = connection.get('handle').cursor() - cursor.execute(sql) + cursor.execute(sql, bindings) logger.debug("SQL status: %s in %0.2f seconds", cls.get_status(cursor), (time.time() - pre)) @@ -603,9 +620,71 @@ def already_exists(cls, profile, schema, table, model_name=None): @classmethod def quote(cls, identifier): - return '"{}"'.format(identifier) + return '"{}"'.format(identifier.replace('"', '""')) @classmethod def quote_schema_and_table(cls, profile, schema, table, model_name=None): return '{}.{}'.format(cls.quote(schema), cls.quote(table)) + + @classmethod + def handle_csv_table(cls, profile, schema, table_name, agate_table, + full_refresh=False): + existing = cls.query_for_existing(profile, schema) + existing_type = existing.get(table_name) + if existing_type and existing_type != "table": + raise dbt.exceptions.RuntimeException( + "Cannot seed to '{}', it is a view".format(table_name)) + if existing_type: + cls.reset_csv_table(profile, schema, table_name, agate_table, + full_refresh=full_refresh) + else: + cls.create_csv_table(profile, schema, table_name, agate_table) + cls.load_csv_rows(profile, schema, table_name, agate_table) + cls.commit_if_has_connection(profile, None) + + @classmethod + def convert_text_type(cls, agate_table, col_idx): + raise dbt.exceptions.NotImplementedException( + '`convert_text_type` is not implemented for this adapter!') + + @classmethod + def convert_number_type(cls, agate_table, col_idx): + raise dbt.exceptions.NotImplementedException( + '`convert_number_type` is not implemented for this adapter!') + + @classmethod + def convert_boolean_type(cls, agate_table, col_idx): + raise dbt.exceptions.NotImplementedException( + '`convert_boolean_type` is not implemented for this adapter!') + + @classmethod + def convert_datetime_type(cls, agate_table, col_idx): + raise dbt.exceptions.NotImplementedException( + '`convert_datetime_type` is not implemented for this adapter!') + + @classmethod + def convert_date_type(cls, agate_table, col_idx): + raise dbt.exceptions.NotImplementedException( + '`convert_date_type` is not implemented for this adapter!') + + @classmethod + def convert_time_type(cls, agate_table, col_idx): + raise dbt.exceptions.NotImplementedException( + '`convert_time_type` is not implemented for this adapter!') + + @classmethod + def convert_agate_type(cls, agate_table, col_idx): + import agate + agate_type = agate_table.column_types[col_idx] + conversions = [ + (agate.Text, cls.convert_text_type), + (agate.Number, cls.convert_number_type), + (agate.Boolean, cls.convert_boolean_type), + (agate.DateTime, cls.convert_datetime_type), + (agate.Date, cls.convert_date_type), + (agate.TimeDelta, cls.convert_time_type), + ] + for agate_cls, func in conversions: + if isinstance(agate_type, agate_cls): + return func(agate_table, col_idx) diff --git a/dbt/adapters/postgres.py b/dbt/adapters/postgres.py index 8836f0bca92..5a0b489af6b 100644 --- a/dbt/adapters/postgres.py +++ b/dbt/adapters/postgres.py @@ -5,6 +5,7 @@ import dbt.adapters.default import dbt.compat import dbt.exceptions +from dbt.utils import max_digits from dbt.logger import GLOBAL_LOGGER as logger @@ -165,3 +166,73 @@ def cancel_connection(cls, profile, connection): res = cursor.fetchone() logger.debug("Cancel query '{}': {}".format(connection_name, res)) + + @classmethod + def convert_text_type(cls, agate_table, col_idx): + return "text" + + @classmethod + def convert_number_type(cls, agate_table, col_idx): + import agate + column = agate_table.columns[col_idx] + precision = max_digits(column.values_without_nulls()) + # agate uses the term Precision but in this context, it is really the + # scale - ie. the number of decimal places + scale = agate_table.aggregate(agate.MaxPrecision(col_idx)) + if not scale: + return "integer" + return "numeric({}, {})".format(precision, scale) + + @classmethod + def convert_boolean_type(cls, agate_table, col_idx): + return "boolean" + + @classmethod + def convert_datetime_type(cls, agate_table, col_idx): + return "timestamp without time zone" + + @classmethod + def convert_date_type(cls, agate_table, col_idx): + return "date" + + @classmethod + def convert_time_type(cls, agate_table, col_idx): + return "time" + + @classmethod + def create_csv_table(cls, profile, schema, table_name, agate_table): + col_sqls = [] + for idx, col_name in enumerate(agate_table.column_names): + type_ = cls.convert_agate_type(agate_table, idx) + col_sqls.append('{} {}'.format(col_name, type_)) + sql = 'create table "{}"."{}" ({})'.format(schema, table_name, + ", ".join(col_sqls)) + return cls.add_query(profile, sql) + + @classmethod + def reset_csv_table(cls, profile, schema, table_name, agate_table, + full_refresh=False): + if full_refresh: + cls.drop_table(profile, schema, table_name, None) + cls.create_csv_table(profile, schema, table_name, agate_table) + else: + cls.truncate(profile, schema, table_name) + + @classmethod + def load_csv_rows(cls, profile, schema, table_name, agate_table): + bindings = [] + placeholders = [] + cols_sql = ", ".join(c for c in agate_table.column_names) + + for row in agate_table.rows: + bindings += row + placeholders.append("({})".format( + ", ".join("%s" for _ in agate_table.column_names))) + + sql = ('insert into {}.{} ({}) values {}' + .format(cls.quote(schema), + cls.quote(table_name), + cols_sql, + ",\n".join(placeholders))) + + cls.add_query(profile, sql, bindings=bindings) diff --git a/dbt/adapters/redshift.py b/dbt/adapters/redshift.py index 07aba16518a..d3009e3e5ce 100644 --- a/dbt/adapters/redshift.py +++ b/dbt/adapters/redshift.py @@ -94,3 +94,14 @@ def drop(cls, profile, schema, relation, relation_type, model_name=None): finally: drop_lock.release() + + @classmethod + def convert_text_type(cls, agate_table, col_idx): + column = agate_table.columns[col_idx] + lens = (len(d.encode("utf-8")) for d in column.values_without_nulls()) + max_len = max(lens) if lens else 64 + return "varchar({})".format(max_len) + + @classmethod + def convert_time_type(cls, agate_table, col_idx): + return "varchar(24)" diff --git a/dbt/adapters/snowflake.py b/dbt/adapters/snowflake.py index 431f1b1c33f..08385a19c91 100644 --- a/dbt/adapters/snowflake.py +++ b/dbt/adapters/snowflake.py @@ -181,7 +181,7 @@ def check_schema_exists(cls, profile, schema, model_name=None): @classmethod def add_query(cls, profile, sql, model_name=None, auto_begin=True, - select_schema=True): + select_schema=True, bindings=None): # snowflake only allows one query per api call. queries = sql.strip().split(";") cursor = None @@ -193,6 +193,11 @@ def add_query(cls, profile, sql, model_name=None, auto_begin=True, model_name, auto_begin) + if bindings: + # The snowflake connector is more strict than, eg., psycopg2 - + # which allows any iterable thing to be passed as a binding. + bindings = tuple(bindings) + for individual_query in queries: # hack -- after the last ';', remove comments and don't run # empty queries. this avoids using exceptions as flow control, @@ -205,7 +210,8 @@ def add_query(cls, profile, sql, model_name=None, auto_begin=True, continue connection, cursor = super(PostgresAdapter, cls).add_query( - profile, individual_query, model_name, auto_begin) + profile, individual_query, model_name, auto_begin, + bindings=bindings) return connection, cursor diff --git a/dbt/compilation.py b/dbt/compilation.py index 706009a77ca..39bc4af302c 100644 --- a/dbt/compilation.py +++ b/dbt/compilation.py @@ -34,17 +34,10 @@ def print_compile_stats(stats): NodeType.Analysis: 'analyses', NodeType.Macro: 'macros', NodeType.Operation: 'operations', + NodeType.Seed: 'seed files', } - results = { - NodeType.Model: 0, - NodeType.Test: 0, - NodeType.Archive: 0, - NodeType.Analysis: 0, - NodeType.Macro: 0, - NodeType.Operation: 0, - } - + results = {k: 0 for k in names.keys()} results.update(stats) stat_line = ", ".join( diff --git a/dbt/contracts/graph/parsed.py b/dbt/contracts/graph/parsed.py index 570b73f25ed..e5dd0cba9ab 100644 --- a/dbt/contracts/graph/parsed.py +++ b/dbt/contracts/graph/parsed.py @@ -1,4 +1,5 @@ from voluptuous import Schema, Required, All, Any, Length, ALLOW_EXTRA +from voluptuous import Optional import dbt.exceptions @@ -43,6 +44,9 @@ Required('empty'): bool, Required('config'): config_contract, Required('tags'): All(set), + + # For csv files + Optional('agate_table'): object, }) parsed_nodes_contract = Schema({ diff --git a/dbt/contracts/graph/unparsed.py b/dbt/contracts/graph/unparsed.py index 4dfcf8e0db1..fc4daae7103 100644 --- a/dbt/contracts/graph/unparsed.py +++ b/dbt/contracts/graph/unparsed.py @@ -22,7 +22,8 @@ Required('resource_type'): Any(NodeType.Model, NodeType.Test, NodeType.Analysis, - NodeType.Operation) + NodeType.Operation, + NodeType.Seed) }) unparsed_nodes_contract = Schema([unparsed_node_contract]) diff --git a/dbt/deprecations.py b/dbt/deprecations.py index a4e1a222934..23566b080af 100644 --- a/dbt/deprecations.py +++ b/dbt/deprecations.py @@ -21,6 +21,13 @@ def show(self, *args, **kwargs): # removed (in favor of 'target') in DBT version 0.7.0""" +class SeedDropExistingDeprecation(DBTDeprecation): + name = 'drop-existing' + description = """The --drop-existing argument has been deprecated. Please + use --full-refresh instead. The --drop-existing option will be removed in a + future version of dbt.""" + + def warn(name, *args, **kwargs): if name not in deprecations: # this should (hopefully) never happen @@ -37,6 +44,7 @@ def warn(name, *args, **kwargs): active_deprecations = set() deprecations_list = [ + SeedDropExistingDeprecation() ] deprecations = {d.name: d for d in deprecations_list} diff --git a/dbt/loader.py b/dbt/loader.py index 3d952f470ff..1d067e04f7e 100644 --- a/dbt/loader.py +++ b/dbt/loader.py @@ -181,6 +181,20 @@ def load_project(cls, root_project, all_projects, macros): macros) +class SeedLoader(ResourceLoader): + + @classmethod + def load_project(cls, root_project, all_projects, project, project_name, + macros): + return dbt.parser.load_and_parse_seeds( + package_name=project_name, + root_project=root_project, + all_projects=all_projects, + root_dir=project.get('project-root'), + relative_dirs=project.get('data-paths', []), + resource_type=NodeType.Seed) + + # node loaders GraphLoader.register(ModelLoader, 'nodes') GraphLoader.register(AnalysisLoader, 'nodes') @@ -188,3 +202,4 @@ def load_project(cls, root_project, all_projects, macros): GraphLoader.register(DataTestLoader, 'nodes') GraphLoader.register(RunHookLoader, 'nodes') GraphLoader.register(ArchiveLoader, 'nodes') +GraphLoader.register(SeedLoader, 'nodes') diff --git a/dbt/logger.py b/dbt/logger.py index 2daa036e6c1..7ce14b7ca7e 100644 --- a/dbt/logger.py +++ b/dbt/logger.py @@ -12,6 +12,7 @@ logging.getLogger('urllib3').setLevel(logging.CRITICAL) logging.getLogger('google').setLevel(logging.CRITICAL) logging.getLogger('snowflake.connector').setLevel(logging.CRITICAL) +logging.getLogger('parsedatetime').setLevel(logging.CRITICAL) # Colorama needs some help on windows because we're using logger.info # intead of print(). If the Windows env doesn't have a TERM var set, diff --git a/dbt/main.py b/dbt/main.py index 0890e64a61c..3282090156f 100644 --- a/dbt/main.py +++ b/dbt/main.py @@ -22,6 +22,7 @@ import dbt.config as config import dbt.ui.printer import dbt.compat +import dbt.deprecations from dbt.utils import ExitCodes @@ -232,7 +233,15 @@ def invoke_dbt(parsed): return None flags.NON_DESTRUCTIVE = getattr(proj.args, 'non_destructive', False) - flags.FULL_REFRESH = getattr(proj.args, 'full_refresh', False) + + arg_drop_existing = getattr(proj.args, 'drop_existing', False) + arg_full_refresh = getattr(proj.args, 'full_refresh', False) + + if arg_drop_existing: + dbt.deprecations.warn('drop-existing') + flags.FULL_REFRESH = True + elif arg_full_refresh: + flags.FULL_REFRESH = True logger.debug("running dbt with arguments %s", parsed) @@ -385,13 +394,23 @@ def parse_args(args): fully-recalculate the incremental table from the model definition. """) - sub = subs.add_parser('seed', parents=[base_subparser]) - sub.add_argument( + seed_sub = subs.add_parser('seed', parents=[base_subparser]) + seed_sub.add_argument( '--drop-existing', action='store_true', - help="Drop existing seed tables and recreate them" + help='(DEPRECATED) Use --full-refresh instead.' + ) + seed_sub.add_argument( + '--full-refresh', + action='store_true', + help='Drop existing seed tables and recreate them' + ) + seed_sub.add_argument( + '--show', + action='store_true', + help='Show a sample of the loaded data in the terminal' ) - sub.set_defaults(cls=seed_task.SeedTask, which='seed') + seed_sub.set_defaults(cls=seed_task.SeedTask, which='seed') sub = subs.add_parser('test', parents=[base_subparser]) sub.add_argument( diff --git a/dbt/node_runners.py b/dbt/node_runners.py index fe7c7132df7..d877c887c17 100644 --- a/dbt/node_runners.py +++ b/dbt/node_runners.py @@ -213,14 +213,14 @@ def execute(self, compiled_node, existing, flat_graph): return RunModelResult(compiled_node) def compile(self, flat_graph): - return self.compile_node(self.adapter, self.project, self.node, - flat_graph) + return self._compile_node(self.adapter, self.project, self.node, + flat_graph) @classmethod - def compile_node(cls, adapter, project, node, flat_graph): + def _compile_node(cls, adapter, project, node, flat_graph): compiler = dbt.compilation.Compiler(project) node = compiler.compile_node(node, flat_graph) - node = cls.inject_runtime_config(adapter, project, node) + node = cls._inject_runtime_config(adapter, project, node) if(node['injected_sql'] is not None and not (dbt.utils.is_type(node, NodeType.Archive))): @@ -238,15 +238,15 @@ def compile_node(cls, adapter, project, node, flat_graph): return node @classmethod - def inject_runtime_config(cls, adapter, project, node): + def _inject_runtime_config(cls, adapter, project, node): wrapped_sql = node.get('wrapped_sql') - context = cls.node_context(adapter, project, node) + context = cls._node_context(adapter, project, node) sql = dbt.clients.jinja.get_rendered(wrapped_sql, context) node['wrapped_sql'] = sql return node @classmethod - def node_context(cls, adapter, project, node): + def _node_context(cls, adapter, project, node): profile = project.run_environment() def call_get_columns_in_table(schema_name, table_name): @@ -271,6 +271,14 @@ def call_table_exists(schema, table): "already_exists": call_table_exists, } + @classmethod + def create_schemas(cls, project, adapter, flat_graph): + profile = project.run_environment() + required_schemas = cls.get_model_schemas(flat_graph) + existing_schemas = set(adapter.get_existing_schemas(profile)) + for schema in (required_schemas - existing_schemas): + adapter.create_schema(profile, schema) + class ModelRunner(CompileRunner): @@ -295,7 +303,7 @@ def run_hooks(cls, project, adapter, flat_graph, hook_type): compiled_hooks = [] for hook in hooks: - compiled = cls.compile_node(adapter, project, hook, flat_graph) + compiled = cls._compile_node(adapter, project, hook, flat_graph) model_name = compiled.get('name') statement = compiled['wrapped_sql'] @@ -474,3 +482,43 @@ def describe_node(self): def print_result_line(self, result): dbt.ui.printer.print_archive_result_line(result, self.node_index, self.num_nodes) + + +class SeedRunner(ModelRunner): + + def describe_node(self): + schema_name = self.node.get('schema') + return "seed file {}.{}".format(schema_name, self.node["name"]) + + @classmethod + def before_run(cls, project, adapter, flat_graph): + cls.create_schemas(project, adapter, flat_graph) + + def before_execute(self): + description = self.describe_node() + dbt.ui.printer.print_start_line(description, self.node_index, + self.num_nodes) + + def execute(self, compiled_node, existing_, flat_graph): + schema = compiled_node["schema"] + table_name = compiled_node["name"] + table = compiled_node["agate_table"] + self.adapter.handle_csv_table(self.profile, schema, table_name, table, + full_refresh=dbt.flags.FULL_REFRESH) + + if dbt.flags.FULL_REFRESH: + status = 'CREATE {}'.format(len(table.rows)) + else: + status = 'INSERT {}'.format(len(table.rows)) + + return RunModelResult(compiled_node, status=status) + + def compile(self, flat_graph): + return self.node + + def print_result_line(self, result): + schema_name = self.node.get('schema') + dbt.ui.printer.print_seed_result_line(result, + schema_name, + self.node_index, + self.num_nodes) diff --git a/dbt/node_types.py b/dbt/node_types.py index ae498d3ea47..5d3ef275c83 100644 --- a/dbt/node_types.py +++ b/dbt/node_types.py @@ -7,6 +7,7 @@ class NodeType(object): Archive = 'archive' Macro = 'macro' Operation = 'operation' + Seed = 'seed' @classmethod def executable(cls): @@ -15,7 +16,8 @@ def executable(cls): cls.Test, cls.Archive, cls.Analysis, - cls.Operation + cls.Operation, + cls.Seed, ] diff --git a/dbt/parser.py b/dbt/parser.py index 128d296fb8a..9eb1fb694aa 100644 --- a/dbt/parser.py +++ b/dbt/parser.py @@ -3,6 +3,7 @@ import re import hashlib import collections +import agate import dbt.exceptions import dbt.flags @@ -203,7 +204,7 @@ def parse_node(node, node_path, root_project_config, package_project_config, root_project_config, package_project_config, fqn) node['unique_id'] = node_path - node['empty'] = (len(node.get('raw_sql').strip()) == 0) + node['empty'] = ('raw_sql' in node and len(node['raw_sql'].strip()) == 0) node['fqn'] = fqn node['tags'] = tags node['config_reference'] = config @@ -468,7 +469,7 @@ def parse_schema_tests(tests, root_project, projects, macros=None): for model_name, test_spec in test_yml.items(): if test_spec is None or test_spec.get('constraints') is None: test_path = test.get('original_file_path', '') - logger.warn(no_tests_warning.format(model_name, test_path)) + logger.warning(no_tests_warning.format(model_name, test_path)) continue for test_type, configs in test_spec.get('constraints', {}).items(): @@ -697,3 +698,54 @@ def parse_archives_from_project(project): }) return archives + + +def parse_seed_file(file_match, root_dir, package_name): + abspath = file_match['absolute_path'] + logger.debug("Parsing {}".format(abspath)) + to_return = {} + table_name = os.path.basename(abspath)[:-4] + node = { + 'unique_id': get_path(NodeType.Seed, package_name, table_name), + 'path': file_match['relative_path'], + 'name': table_name, + 'root_path': root_dir, + 'resource_type': NodeType.Seed, + # Give this raw_sql so it conforms to the node spec, + # use dummy text so it doesn't look like an empty node + 'raw_sql': '-- csv --', + 'package_name': package_name, + 'depends_on': {'nodes': []}, + 'original_file_path': os.path.join(file_match.get('searched_path'), + file_match.get('relative_path')), + } + try: + table = agate.Table.from_csv(abspath) + except ValueError as e: + dbt.exceptions.raise_compiler_error(str(e), node) + table.original_abspath = abspath + node['agate_table'] = table + return node + + +def load_and_parse_seeds(package_name, root_project, all_projects, root_dir, + relative_dirs, resource_type, tags=None, macros=None): + extension = "[!.#~]*.csv" + if dbt.flags.STRICT_MODE: + dbt.contracts.project.validate_list(all_projects) + file_matches = dbt.clients.system.find_matching( + root_dir, + relative_dirs, + extension) + result = {} + for file_match in file_matches: + node = parse_seed_file(file_match, root_dir, package_name) + node_path = node['unique_id'] + parsed = parse_node(node, node_path, root_project, + all_projects.get(package_name), + all_projects, tags=tags, macros=macros) + # parsed['empty'] = False + result[node_path] = parsed + + dbt.contracts.graph.parsed.validate_nodes(result) + return result diff --git a/dbt/runner.py b/dbt/runner.py index a496b9c686f..efb9ff7427e 100644 --- a/dbt/runner.py +++ b/dbt/runner.py @@ -26,7 +26,7 @@ def __init__(self, project, target_path, args): profile = self.project.run_environment() # TODO validate the number of threads - if self.args.threads is None: + if not getattr(self.args, "threads", None): self.threads = profile.get('threads', 1) else: self.threads = self.args.threads diff --git a/dbt/seeder.py b/dbt/seeder.py deleted file mode 100644 index d30e5f131cc..00000000000 --- a/dbt/seeder.py +++ /dev/null @@ -1,139 +0,0 @@ -import os -import fnmatch -from csvkit import table as csv_table, sql as csv_sql -from sqlalchemy.dialects import postgresql as postgresql_dialect -import psycopg2 - -from dbt.source import Source -from dbt.logger import GLOBAL_LOGGER as logger -from dbt.adapters.factory import get_adapter -import dbt.exceptions - - -class Seeder: - def __init__(self, project): - self.project = project - run_environment = self.project.run_environment() - - def find_csvs(self): - return Source(self.project).get_csvs(self.project['data-paths']) - - def drop_table(self, cursor, schema, table): - sql = 'drop table if exists "{schema}"."{table}" cascade'.format( - schema=schema, table=table - ) - logger.info("Dropping table {}.{}".format(schema, table)) - cursor.execute(sql) - - def truncate_table(self, cursor, schema, table): - sql = 'truncate table "{schema}"."{table}"'.format( - schema=schema, table=table - ) - logger.info("Truncating table {}.{}".format(schema, table)) - cursor.execute(sql) - - def create_table(self, cursor, schema, table, virtual_table): - sql_table = csv_sql.make_table(virtual_table, db_schema=schema) - create_table_sql = csv_sql.make_create_table_statement( - sql_table, dialect='postgresql' - ) - logger.info("Creating table {}.{}".format(schema, table)) - cursor.execute(create_table_sql) - - def insert_into_table(self, cursor, schema, table, virtual_table): - headers = virtual_table.headers() - - header_csv = ", ".join(['"{}"'.format(h) for h in headers]) - base_insert = ('INSERT INTO "{schema}"."{table}" ({header_csv}) ' - 'VALUES '.format( - schema=schema, - table=table, - header_csv=header_csv - )) - records = [] - - def quote_or_null(s): - if s is None: - return 'null' - else: - return "'{}'".format(s) - - for row in virtual_table.to_rows(): - record_csv = ', '.join([quote_or_null(val) for val in row]) - record_csv_wrapped = "({})".format(record_csv) - records.append(record_csv_wrapped) - insert_sql = "{} {}".format(base_insert, ",\n".join(records)) - logger.info("Inserting {} records into table {}.{}" - .format(len(virtual_table.to_rows()), schema, table)) - cursor.execute(insert_sql) - - def existing_tables(self, cursor, schema): - sql = ("select tablename as name from pg_tables where " - "schemaname = '{schema}'".format(schema=schema)) - - cursor.execute(sql) - existing = set([row[0] for row in cursor.fetchall()]) - return existing - - def do_seed(self, schema, cursor, drop_existing): - existing_tables = self.existing_tables(cursor, schema) - - csvs = self.find_csvs() - statuses = [] - for csv in csvs: - - table_name = csv.name - fh = open(csv.filepath) - virtual_table = csv_table.Table.from_csv(fh, table_name) - - if table_name in existing_tables: - if drop_existing: - self.drop_table(cursor, schema, table_name) - self.create_table( - cursor, - schema, - table_name, - virtual_table - ) - else: - self.truncate_table(cursor, schema, table_name) - else: - self.create_table(cursor, schema, table_name, virtual_table) - - try: - self.insert_into_table( - cursor, schema, table_name, virtual_table - ) - statuses.append(True) - - except psycopg2.ProgrammingError as e: - statuses.append(False) - logger.info( - 'Encountered an error while inserting into table "{}"."{}"' - .format(schema, table_name) - ) - logger.info( - 'Check for formatting errors in {}'.format(csv.filepath) - ) - logger.info( - 'Try --drop-existing to delete and recreate the table ' - 'instead' - ) - logger.info(str(e)) - return all(statuses) - - def seed(self, drop_existing=False): - profile = self.project.run_environment() - - if profile.get('type') == 'snowflake': - raise dbt.exceptions.NotImplementedException( - "`seed` operation is not supported for snowflake.") - - adapter = get_adapter(profile) - connection = adapter.get_connection(profile) - - schema = connection.get('credentials', {}).get('schema') - - with connection.get('handle') as handle: - with handle.cursor() as cursor: - return self.do_seed(schema, cursor, drop_existing) diff --git a/dbt/task/seed.py b/dbt/task/seed.py index fabc14eb412..1f3ac5f7e42 100644 --- a/dbt/task/seed.py +++ b/dbt/task/seed.py @@ -1,12 +1,47 @@ -import os -from dbt.seeder import Seeder -from dbt.task.base_task import BaseTask +import random +from dbt.logger import GLOBAL_LOGGER as logger +from dbt.node_runners import SeedRunner +from dbt.node_types import NodeType +from dbt.runner import RunManager +from dbt.task.base_task import RunnableTask +import dbt.ui.printer -class SeedTask(BaseTask): +class SeedTask(RunnableTask): def run(self): - seeder = Seeder(self.project) - self.success = seeder.seed(self.args.drop_existing) + runner = RunManager( + self.project, + self.project["target-path"], + self.args, + ) + query = { + "include": ["*"], + "exclude": [], + "resource_types": [NodeType.Seed], + } + results = runner.run_flat(query, SeedRunner) - def interpret_results(self, results): - return self.success + if self.args.show: + self.show_tables(results) + + dbt.ui.printer.print_run_end_messages(results) + return results + + def show_table(self, result): + table = result.node['agate_table'] + rand_table = table.order_by(lambda x: random.random()) + + schema = result.node['schema'] + name = result.node['name'] + + header = "Random sample of table: {}.{}".format(schema, name) + logger.info("") + logger.info(header) + logger.info("-" * len(header)) + rand_table.print_table(max_rows=10, max_columns=None) + logger.info("") + + def show_tables(self, results): + for result in results: + if not result.errored: + self.show_table(result) diff --git a/dbt/ui/printer.py b/dbt/ui/printer.py index 33904c1c2b4..871bedd2c63 100644 --- a/dbt/ui/printer.py +++ b/dbt/ui/printer.py @@ -178,6 +178,22 @@ def print_archive_result_line(result, index, total): result.execution_time) +def print_seed_result_line(result, schema_name, index, total): + model = result.node + + info, status = get_printable_result(result, 'loaded', 'loading') + + print_fancy_output_line( + "{info} seed file {schema}.{relation}".format( + info=info, + schema=schema_name, + relation=model.get('name')), + status, + index, + total, + result.execution_time) + + def interpret_run_result(result): if result.errored or result.failed: return 'error' diff --git a/dbt/utils.py b/dbt/utils.py index c0008b95e5a..14cbf97deae 100644 --- a/dbt/utils.py +++ b/dbt/utils.py @@ -363,6 +363,19 @@ def flatten_nodes(dep_list): return list(itertools.chain.from_iterable(dep_list)) +def max_digits(values): + """Given a series of decimal.Decimal values, find the maximum + number of digits (on both sides of the decimal point) used by the + values.""" + max_ = 0 + for value in values: + if value is None: + continue + sign, digits, exponent = value.normalize().as_tuple() + max_ = max(len(digits), max_) + return max_ + + def invalid_ref_fail_unless_test(node, target_model_name, target_model_package): if node.get('resource_type') == NodeType.Test: diff --git a/requirements.txt b/requirements.txt index c73a725f263..9c2735f1523 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,10 +6,10 @@ PyYAML>=3.11 psycopg2==2.7.1 sqlparse==0.2.3 networkx==1.11 -csvkit==0.9.1 snowplow-tracker==0.7.2 celery==3.1.23 voluptuous==0.10.5 snowflake-connector-python>=1.4.9 colorama==0.3.9 google-cloud-bigquery==0.26.0 +agate>=1.6,<2 diff --git a/setup.py b/setup.py index 53fee98e563..23ecc1bc669 100644 --- a/setup.py +++ b/setup.py @@ -38,12 +38,12 @@ 'psycopg2==2.7.1', 'sqlparse==0.2.3', 'networkx==1.11', - 'csvkit==0.9.1', 'snowplow-tracker==0.7.2', 'celery==3.1.23', 'voluptuous==0.10.5', 'snowflake-connector-python>=1.4.9', 'colorama==0.3.9', 'google-cloud-bigquery==0.26.0', + 'agate>=1.6,<2', ] ) diff --git a/test/integration/001_simple_copy_test/seed-initial/seed.csv b/test/integration/001_simple_copy_test/seed-initial/seed.csv new file mode 100644 index 00000000000..640af6c4ee6 --- /dev/null +++ b/test/integration/001_simple_copy_test/seed-initial/seed.csv @@ -0,0 +1,101 @@ +id,first_name,last_name,email,gender,ip_address +1,Jack,Hunter,jhunter0@pbs.org,Male,59.80.20.168 +2,Kathryn,Walker,kwalker1@ezinearticles.com,Female,194.121.179.35 +3,Gerald,Ryan,gryan2@com.com,Male,11.3.212.243 +4,Bonnie,Spencer,bspencer3@ameblo.jp,Female,216.32.196.175 +5,Harold,Taylor,htaylor4@people.com.cn,Male,253.10.246.136 +6,Jacqueline,Griffin,jgriffin5@t.co,Female,16.13.192.220 +7,Wanda,Arnold,warnold6@google.nl,Female,232.116.150.64 +8,Craig,Ortiz,cortiz7@sciencedaily.com,Male,199.126.106.13 +9,Gary,Day,gday8@nih.gov,Male,35.81.68.186 +10,Rose,Wright,rwright9@yahoo.co.jp,Female,236.82.178.100 +11,Raymond,Kelley,rkelleya@fc2.com,Male,213.65.166.67 +12,Gerald,Robinson,grobinsonb@disqus.com,Male,72.232.194.193 +13,Mildred,Martinez,mmartinezc@samsung.com,Female,198.29.112.5 +14,Dennis,Arnold,darnoldd@google.com,Male,86.96.3.250 +15,Judy,Gray,jgraye@opensource.org,Female,79.218.162.245 +16,Theresa,Garza,tgarzaf@epa.gov,Female,21.59.100.54 +17,Gerald,Robertson,grobertsong@csmonitor.com,Male,131.134.82.96 +18,Philip,Hernandez,phernandezh@adobe.com,Male,254.196.137.72 +19,Julia,Gonzalez,jgonzalezi@cam.ac.uk,Female,84.240.227.174 +20,Andrew,Davis,adavisj@patch.com,Male,9.255.67.25 +21,Kimberly,Harper,kharperk@foxnews.com,Female,198.208.120.253 +22,Mark,Martin,mmartinl@marketwatch.com,Male,233.138.182.153 +23,Cynthia,Ruiz,cruizm@google.fr,Female,18.178.187.201 +24,Samuel,Carroll,scarrolln@youtu.be,Male,128.113.96.122 +25,Jennifer,Larson,jlarsono@vinaora.com,Female,98.234.85.95 +26,Ashley,Perry,aperryp@rakuten.co.jp,Female,247.173.114.52 +27,Howard,Rodriguez,hrodriguezq@shutterfly.com,Male,231.188.95.26 +28,Amy,Brooks,abrooksr@theatlantic.com,Female,141.199.174.118 +29,Louise,Warren,lwarrens@adobe.com,Female,96.105.158.28 +30,Tina,Watson,twatsont@myspace.com,Female,251.142.118.177 +31,Janice,Kelley,jkelleyu@creativecommons.org,Female,239.167.34.233 +32,Terry,Mccoy,tmccoyv@bravesites.com,Male,117.201.183.203 +33,Jeffrey,Morgan,jmorganw@surveymonkey.com,Male,78.101.78.149 +34,Louis,Harvey,lharveyx@sina.com.cn,Male,51.50.0.167 +35,Philip,Miller,pmillery@samsung.com,Male,103.255.222.110 +36,Willie,Marshall,wmarshallz@ow.ly,Male,149.219.91.68 +37,Patrick,Lopez,plopez10@redcross.org,Male,250.136.229.89 +38,Adam,Jenkins,ajenkins11@harvard.edu,Male,7.36.112.81 +39,Benjamin,Cruz,bcruz12@linkedin.com,Male,32.38.98.15 +40,Ruby,Hawkins,rhawkins13@gmpg.org,Female,135.171.129.255 +41,Carlos,Barnes,cbarnes14@a8.net,Male,240.197.85.140 +42,Ruby,Griffin,rgriffin15@bravesites.com,Female,19.29.135.24 +43,Sean,Mason,smason16@icq.com,Male,159.219.155.249 +44,Anthony,Payne,apayne17@utexas.edu,Male,235.168.199.218 +45,Steve,Cruz,scruz18@pcworld.com,Male,238.201.81.198 +46,Anthony,Garcia,agarcia19@flavors.me,Male,25.85.10.18 +47,Doris,Lopez,dlopez1a@sphinn.com,Female,245.218.51.238 +48,Susan,Nichols,snichols1b@freewebs.com,Female,199.99.9.61 +49,Wanda,Ferguson,wferguson1c@yahoo.co.jp,Female,236.241.135.21 +50,Andrea,Pierce,apierce1d@google.co.uk,Female,132.40.10.209 +51,Lawrence,Phillips,lphillips1e@jugem.jp,Male,72.226.82.87 +52,Judy,Gilbert,jgilbert1f@multiply.com,Female,196.250.15.142 +53,Eric,Williams,ewilliams1g@joomla.org,Male,222.202.73.126 +54,Ralph,Romero,rromero1h@sogou.com,Male,123.184.125.212 +55,Jean,Wilson,jwilson1i@ocn.ne.jp,Female,176.106.32.194 +56,Lori,Reynolds,lreynolds1j@illinois.edu,Female,114.181.203.22 +57,Donald,Moreno,dmoreno1k@bbc.co.uk,Male,233.249.97.60 +58,Steven,Berry,sberry1l@eepurl.com,Male,186.193.50.50 +59,Theresa,Shaw,tshaw1m@people.com.cn,Female,120.37.71.222 +60,John,Stephens,jstephens1n@nationalgeographic.com,Male,191.87.127.115 +61,Richard,Jacobs,rjacobs1o@state.tx.us,Male,66.210.83.155 +62,Andrew,Lawson,alawson1p@over-blog.com,Male,54.98.36.94 +63,Peter,Morgan,pmorgan1q@rambler.ru,Male,14.77.29.106 +64,Nicole,Garrett,ngarrett1r@zimbio.com,Female,21.127.74.68 +65,Joshua,Kim,jkim1s@edublogs.org,Male,57.255.207.41 +66,Ralph,Roberts,rroberts1t@people.com.cn,Male,222.143.131.109 +67,George,Montgomery,gmontgomery1u@smugmug.com,Male,76.75.111.77 +68,Gerald,Alvarez,galvarez1v@flavors.me,Male,58.157.186.194 +69,Donald,Olson,dolson1w@whitehouse.gov,Male,69.65.74.135 +70,Carlos,Morgan,cmorgan1x@pbs.org,Male,96.20.140.87 +71,Aaron,Stanley,astanley1y@webnode.com,Male,163.119.217.44 +72,Virginia,Long,vlong1z@spiegel.de,Female,204.150.194.182 +73,Robert,Berry,rberry20@tripadvisor.com,Male,104.19.48.241 +74,Antonio,Brooks,abrooks21@unesco.org,Male,210.31.7.24 +75,Ruby,Garcia,rgarcia22@ovh.net,Female,233.218.162.214 +76,Jack,Hanson,jhanson23@blogtalkradio.com,Male,31.55.46.199 +77,Kathryn,Nelson,knelson24@walmart.com,Female,14.189.146.41 +78,Jason,Reed,jreed25@printfriendly.com,Male,141.189.89.255 +79,George,Coleman,gcoleman26@people.com.cn,Male,81.189.221.144 +80,Rose,King,rking27@ucoz.com,Female,212.123.168.231 +81,Johnny,Holmes,jholmes28@boston.com,Male,177.3.93.188 +82,Katherine,Gilbert,kgilbert29@altervista.org,Female,199.215.169.61 +83,Joshua,Thomas,jthomas2a@ustream.tv,Male,0.8.205.30 +84,Julie,Perry,jperry2b@opensource.org,Female,60.116.114.192 +85,Richard,Perry,rperry2c@oracle.com,Male,181.125.70.232 +86,Kenneth,Ruiz,kruiz2d@wikimedia.org,Male,189.105.137.109 +87,Jose,Morgan,jmorgan2e@webnode.com,Male,101.134.215.156 +88,Donald,Campbell,dcampbell2f@goo.ne.jp,Male,102.120.215.84 +89,Debra,Collins,dcollins2g@uol.com.br,Female,90.13.153.235 +90,Jesse,Johnson,jjohnson2h@stumbleupon.com,Male,225.178.125.53 +91,Elizabeth,Stone,estone2i@histats.com,Female,123.184.126.221 +92,Angela,Rogers,arogers2j@goodreads.com,Female,98.104.132.187 +93,Emily,Dixon,edixon2k@mlb.com,Female,39.190.75.57 +94,Albert,Scott,ascott2l@tinypic.com,Male,40.209.13.189 +95,Barbara,Peterson,bpeterson2m@ow.ly,Female,75.249.136.180 +96,Adam,Greene,agreene2n@fastcompany.com,Male,184.173.109.144 +97,Earl,Sanders,esanders2o@hc360.com,Male,247.34.90.117 +98,Angela,Brooks,abrooks2p@mtv.com,Female,10.63.249.126 +99,Harold,Foster,hfoster2q@privacy.gov.au,Male,139.214.40.244 +100,Carl,Meyer,cmeyer2r@disqus.com,Male,204.117.7.88 diff --git a/test/integration/001_simple_copy_test/seed-update/seed.csv b/test/integration/001_simple_copy_test/seed-update/seed.csv new file mode 100644 index 00000000000..5b93306a280 --- /dev/null +++ b/test/integration/001_simple_copy_test/seed-update/seed.csv @@ -0,0 +1,201 @@ +id,first_name,last_name,email,gender,ip_address +1,Jack,Hunter,jhunter0@pbs.org,Male,59.80.20.168 +2,Kathryn,Walker,kwalker1@ezinearticles.com,Female,194.121.179.35 +3,Gerald,Ryan,gryan2@com.com,Male,11.3.212.243 +4,Bonnie,Spencer,bspencer3@ameblo.jp,Female,216.32.196.175 +5,Harold,Taylor,htaylor4@people.com.cn,Male,253.10.246.136 +6,Jacqueline,Griffin,jgriffin5@t.co,Female,16.13.192.220 +7,Wanda,Arnold,warnold6@google.nl,Female,232.116.150.64 +8,Craig,Ortiz,cortiz7@sciencedaily.com,Male,199.126.106.13 +9,Gary,Day,gday8@nih.gov,Male,35.81.68.186 +10,Rose,Wright,rwright9@yahoo.co.jp,Female,236.82.178.100 +11,Raymond,Kelley,rkelleya@fc2.com,Male,213.65.166.67 +12,Gerald,Robinson,grobinsonb@disqus.com,Male,72.232.194.193 +13,Mildred,Martinez,mmartinezc@samsung.com,Female,198.29.112.5 +14,Dennis,Arnold,darnoldd@google.com,Male,86.96.3.250 +15,Judy,Gray,jgraye@opensource.org,Female,79.218.162.245 +16,Theresa,Garza,tgarzaf@epa.gov,Female,21.59.100.54 +17,Gerald,Robertson,grobertsong@csmonitor.com,Male,131.134.82.96 +18,Philip,Hernandez,phernandezh@adobe.com,Male,254.196.137.72 +19,Julia,Gonzalez,jgonzalezi@cam.ac.uk,Female,84.240.227.174 +20,Andrew,Davis,adavisj@patch.com,Male,9.255.67.25 +21,Kimberly,Harper,kharperk@foxnews.com,Female,198.208.120.253 +22,Mark,Martin,mmartinl@marketwatch.com,Male,233.138.182.153 +23,Cynthia,Ruiz,cruizm@google.fr,Female,18.178.187.201 +24,Samuel,Carroll,scarrolln@youtu.be,Male,128.113.96.122 +25,Jennifer,Larson,jlarsono@vinaora.com,Female,98.234.85.95 +26,Ashley,Perry,aperryp@rakuten.co.jp,Female,247.173.114.52 +27,Howard,Rodriguez,hrodriguezq@shutterfly.com,Male,231.188.95.26 +28,Amy,Brooks,abrooksr@theatlantic.com,Female,141.199.174.118 +29,Louise,Warren,lwarrens@adobe.com,Female,96.105.158.28 +30,Tina,Watson,twatsont@myspace.com,Female,251.142.118.177 +31,Janice,Kelley,jkelleyu@creativecommons.org,Female,239.167.34.233 +32,Terry,Mccoy,tmccoyv@bravesites.com,Male,117.201.183.203 +33,Jeffrey,Morgan,jmorganw@surveymonkey.com,Male,78.101.78.149 +34,Louis,Harvey,lharveyx@sina.com.cn,Male,51.50.0.167 +35,Philip,Miller,pmillery@samsung.com,Male,103.255.222.110 +36,Willie,Marshall,wmarshallz@ow.ly,Male,149.219.91.68 +37,Patrick,Lopez,plopez10@redcross.org,Male,250.136.229.89 +38,Adam,Jenkins,ajenkins11@harvard.edu,Male,7.36.112.81 +39,Benjamin,Cruz,bcruz12@linkedin.com,Male,32.38.98.15 +40,Ruby,Hawkins,rhawkins13@gmpg.org,Female,135.171.129.255 +41,Carlos,Barnes,cbarnes14@a8.net,Male,240.197.85.140 +42,Ruby,Griffin,rgriffin15@bravesites.com,Female,19.29.135.24 +43,Sean,Mason,smason16@icq.com,Male,159.219.155.249 +44,Anthony,Payne,apayne17@utexas.edu,Male,235.168.199.218 +45,Steve,Cruz,scruz18@pcworld.com,Male,238.201.81.198 +46,Anthony,Garcia,agarcia19@flavors.me,Male,25.85.10.18 +47,Doris,Lopez,dlopez1a@sphinn.com,Female,245.218.51.238 +48,Susan,Nichols,snichols1b@freewebs.com,Female,199.99.9.61 +49,Wanda,Ferguson,wferguson1c@yahoo.co.jp,Female,236.241.135.21 +50,Andrea,Pierce,apierce1d@google.co.uk,Female,132.40.10.209 +51,Lawrence,Phillips,lphillips1e@jugem.jp,Male,72.226.82.87 +52,Judy,Gilbert,jgilbert1f@multiply.com,Female,196.250.15.142 +53,Eric,Williams,ewilliams1g@joomla.org,Male,222.202.73.126 +54,Ralph,Romero,rromero1h@sogou.com,Male,123.184.125.212 +55,Jean,Wilson,jwilson1i@ocn.ne.jp,Female,176.106.32.194 +56,Lori,Reynolds,lreynolds1j@illinois.edu,Female,114.181.203.22 +57,Donald,Moreno,dmoreno1k@bbc.co.uk,Male,233.249.97.60 +58,Steven,Berry,sberry1l@eepurl.com,Male,186.193.50.50 +59,Theresa,Shaw,tshaw1m@people.com.cn,Female,120.37.71.222 +60,John,Stephens,jstephens1n@nationalgeographic.com,Male,191.87.127.115 +61,Richard,Jacobs,rjacobs1o@state.tx.us,Male,66.210.83.155 +62,Andrew,Lawson,alawson1p@over-blog.com,Male,54.98.36.94 +63,Peter,Morgan,pmorgan1q@rambler.ru,Male,14.77.29.106 +64,Nicole,Garrett,ngarrett1r@zimbio.com,Female,21.127.74.68 +65,Joshua,Kim,jkim1s@edublogs.org,Male,57.255.207.41 +66,Ralph,Roberts,rroberts1t@people.com.cn,Male,222.143.131.109 +67,George,Montgomery,gmontgomery1u@smugmug.com,Male,76.75.111.77 +68,Gerald,Alvarez,galvarez1v@flavors.me,Male,58.157.186.194 +69,Donald,Olson,dolson1w@whitehouse.gov,Male,69.65.74.135 +70,Carlos,Morgan,cmorgan1x@pbs.org,Male,96.20.140.87 +71,Aaron,Stanley,astanley1y@webnode.com,Male,163.119.217.44 +72,Virginia,Long,vlong1z@spiegel.de,Female,204.150.194.182 +73,Robert,Berry,rberry20@tripadvisor.com,Male,104.19.48.241 +74,Antonio,Brooks,abrooks21@unesco.org,Male,210.31.7.24 +75,Ruby,Garcia,rgarcia22@ovh.net,Female,233.218.162.214 +76,Jack,Hanson,jhanson23@blogtalkradio.com,Male,31.55.46.199 +77,Kathryn,Nelson,knelson24@walmart.com,Female,14.189.146.41 +78,Jason,Reed,jreed25@printfriendly.com,Male,141.189.89.255 +79,George,Coleman,gcoleman26@people.com.cn,Male,81.189.221.144 +80,Rose,King,rking27@ucoz.com,Female,212.123.168.231 +81,Johnny,Holmes,jholmes28@boston.com,Male,177.3.93.188 +82,Katherine,Gilbert,kgilbert29@altervista.org,Female,199.215.169.61 +83,Joshua,Thomas,jthomas2a@ustream.tv,Male,0.8.205.30 +84,Julie,Perry,jperry2b@opensource.org,Female,60.116.114.192 +85,Richard,Perry,rperry2c@oracle.com,Male,181.125.70.232 +86,Kenneth,Ruiz,kruiz2d@wikimedia.org,Male,189.105.137.109 +87,Jose,Morgan,jmorgan2e@webnode.com,Male,101.134.215.156 +88,Donald,Campbell,dcampbell2f@goo.ne.jp,Male,102.120.215.84 +89,Debra,Collins,dcollins2g@uol.com.br,Female,90.13.153.235 +90,Jesse,Johnson,jjohnson2h@stumbleupon.com,Male,225.178.125.53 +91,Elizabeth,Stone,estone2i@histats.com,Female,123.184.126.221 +92,Angela,Rogers,arogers2j@goodreads.com,Female,98.104.132.187 +93,Emily,Dixon,edixon2k@mlb.com,Female,39.190.75.57 +94,Albert,Scott,ascott2l@tinypic.com,Male,40.209.13.189 +95,Barbara,Peterson,bpeterson2m@ow.ly,Female,75.249.136.180 +96,Adam,Greene,agreene2n@fastcompany.com,Male,184.173.109.144 +97,Earl,Sanders,esanders2o@hc360.com,Male,247.34.90.117 +98,Angela,Brooks,abrooks2p@mtv.com,Female,10.63.249.126 +99,Harold,Foster,hfoster2q@privacy.gov.au,Male,139.214.40.244 +100,Carl,Meyer,cmeyer2r@disqus.com,Male,204.117.7.88 +101,Michael,Perez,mperez0@chronoengine.com,Male,106.239.70.175 +102,Shawn,Mccoy,smccoy1@reddit.com,Male,24.165.76.182 +103,Kathleen,Payne,kpayne2@cargocollective.com,Female,113.207.168.106 +104,Jimmy,Cooper,jcooper3@cargocollective.com,Male,198.24.63.114 +105,Katherine,Rice,krice4@typepad.com,Female,36.97.186.238 +106,Sarah,Ryan,sryan5@gnu.org,Female,119.117.152.40 +107,Martin,Mcdonald,mmcdonald6@opera.com,Male,8.76.38.115 +108,Frank,Robinson,frobinson7@wunderground.com,Male,186.14.64.194 +109,Jennifer,Franklin,jfranklin8@mail.ru,Female,91.216.3.131 +110,Henry,Welch,hwelch9@list-manage.com,Male,176.35.182.168 +111,Fred,Snyder,fsnydera@reddit.com,Male,217.106.196.54 +112,Amy,Dunn,adunnb@nba.com,Female,95.39.163.195 +113,Kathleen,Meyer,kmeyerc@cdc.gov,Female,164.142.188.214 +114,Steve,Ferguson,sfergusond@reverbnation.com,Male,138.22.204.251 +115,Teresa,Hill,thille@dion.ne.jp,Female,82.84.228.235 +116,Amanda,Harper,aharperf@mail.ru,Female,16.123.56.176 +117,Kimberly,Ray,krayg@xing.com,Female,48.66.48.12 +118,Johnny,Knight,jknighth@jalbum.net,Male,99.30.138.123 +119,Virginia,Freeman,vfreemani@tiny.cc,Female,225.172.182.63 +120,Anna,Austin,aaustinj@diigo.com,Female,62.111.227.148 +121,Willie,Hill,whillk@mail.ru,Male,0.86.232.249 +122,Sean,Harris,sharrisl@zdnet.com,Male,117.165.133.249 +123,Mildred,Adams,madamsm@usatoday.com,Female,163.44.97.46 +124,David,Graham,dgrahamn@zimbio.com,Male,78.13.246.202 +125,Victor,Hunter,vhuntero@ehow.com,Male,64.156.179.139 +126,Aaron,Ruiz,aruizp@weebly.com,Male,34.194.68.78 +127,Benjamin,Brooks,bbrooksq@jalbum.net,Male,20.192.189.107 +128,Lisa,Wilson,lwilsonr@japanpost.jp,Female,199.152.130.217 +129,Benjamin,King,bkings@comsenz.com,Male,29.189.189.213 +130,Christina,Williamson,cwilliamsont@boston.com,Female,194.101.52.60 +131,Jane,Gonzalez,jgonzalezu@networksolutions.com,Female,109.119.12.87 +132,Thomas,Owens,towensv@psu.edu,Male,84.168.213.153 +133,Katherine,Moore,kmoorew@naver.com,Female,183.150.65.24 +134,Jennifer,Stewart,jstewartx@yahoo.com,Female,38.41.244.58 +135,Sara,Tucker,stuckery@topsy.com,Female,181.130.59.184 +136,Harold,Ortiz,hortizz@vkontakte.ru,Male,198.231.63.137 +137,Shirley,James,sjames10@yelp.com,Female,83.27.160.104 +138,Dennis,Johnson,djohnson11@slate.com,Male,183.178.246.101 +139,Louise,Weaver,lweaver12@china.com.cn,Female,1.14.110.18 +140,Maria,Armstrong,marmstrong13@prweb.com,Female,181.142.1.249 +141,Gloria,Cruz,gcruz14@odnoklassniki.ru,Female,178.232.140.243 +142,Diana,Spencer,dspencer15@ifeng.com,Female,125.153.138.244 +143,Kelly,Nguyen,knguyen16@altervista.org,Female,170.13.201.119 +144,Jane,Rodriguez,jrodriguez17@biblegateway.com,Female,12.102.249.81 +145,Scott,Brown,sbrown18@geocities.jp,Male,108.174.99.192 +146,Norma,Cruz,ncruz19@si.edu,Female,201.112.156.197 +147,Marie,Peters,mpeters1a@mlb.com,Female,231.121.197.144 +148,Lillian,Carr,lcarr1b@typepad.com,Female,206.179.164.163 +149,Judy,Nichols,jnichols1c@t-online.de,Female,158.190.209.194 +150,Billy,Long,blong1d@yahoo.com,Male,175.20.23.160 +151,Howard,Reid,hreid1e@exblog.jp,Male,118.99.196.20 +152,Laura,Ferguson,lferguson1f@tuttocitta.it,Female,22.77.87.110 +153,Anne,Bailey,abailey1g@geocities.com,Female,58.144.159.245 +154,Rose,Morgan,rmorgan1h@ehow.com,Female,118.127.97.4 +155,Nicholas,Reyes,nreyes1i@google.ru,Male,50.135.10.252 +156,Joshua,Kennedy,jkennedy1j@house.gov,Male,154.6.163.209 +157,Paul,Watkins,pwatkins1k@upenn.edu,Male,177.236.120.87 +158,Kathryn,Kelly,kkelly1l@businessweek.com,Female,70.28.61.86 +159,Adam,Armstrong,aarmstrong1m@techcrunch.com,Male,133.235.24.202 +160,Norma,Wallace,nwallace1n@phoca.cz,Female,241.119.227.128 +161,Timothy,Reyes,treyes1o@google.cn,Male,86.28.23.26 +162,Elizabeth,Patterson,epatterson1p@sun.com,Female,139.97.159.149 +163,Edward,Gomez,egomez1q@google.fr,Male,158.103.108.255 +164,David,Cox,dcox1r@friendfeed.com,Male,206.80.80.58 +165,Brenda,Wood,bwood1s@over-blog.com,Female,217.207.44.179 +166,Adam,Walker,awalker1t@blogs.com,Male,253.211.54.93 +167,Michael,Hart,mhart1u@wix.com,Male,230.206.200.22 +168,Jesse,Ellis,jellis1v@google.co.uk,Male,213.254.162.52 +169,Janet,Powell,jpowell1w@un.org,Female,27.192.194.86 +170,Helen,Ford,hford1x@creativecommons.org,Female,52.160.102.168 +171,Gerald,Carpenter,gcarpenter1y@about.me,Male,36.30.194.218 +172,Kathryn,Oliver,koliver1z@army.mil,Female,202.63.103.69 +173,Alan,Berry,aberry20@gov.uk,Male,246.157.112.211 +174,Harry,Andrews,handrews21@ameblo.jp,Male,195.108.0.12 +175,Andrea,Hall,ahall22@hp.com,Female,149.162.163.28 +176,Barbara,Wells,bwells23@behance.net,Female,224.70.72.1 +177,Anne,Wells,awells24@apache.org,Female,180.168.81.153 +178,Harry,Harper,hharper25@rediff.com,Male,151.87.130.21 +179,Jack,Ray,jray26@wufoo.com,Male,220.109.38.178 +180,Phillip,Hamilton,phamilton27@joomla.org,Male,166.40.47.30 +181,Shirley,Hunter,shunter28@newsvine.com,Female,97.209.140.194 +182,Arthur,Daniels,adaniels29@reuters.com,Male,5.40.240.86 +183,Virginia,Rodriguez,vrodriguez2a@walmart.com,Female,96.80.164.184 +184,Christina,Ryan,cryan2b@hibu.com,Female,56.35.5.52 +185,Theresa,Mendoza,tmendoza2c@vinaora.com,Female,243.42.0.210 +186,Jason,Cole,jcole2d@ycombinator.com,Male,198.248.39.129 +187,Phillip,Bryant,pbryant2e@rediff.com,Male,140.39.116.251 +188,Adam,Torres,atorres2f@sun.com,Male,101.75.187.135 +189,Margaret,Johnston,mjohnston2g@ucsd.edu,Female,159.30.69.149 +190,Paul,Payne,ppayne2h@hhs.gov,Male,199.234.140.220 +191,Todd,Willis,twillis2i@businessweek.com,Male,191.59.136.214 +192,Willie,Oliver,woliver2j@noaa.gov,Male,44.212.35.197 +193,Frances,Robertson,frobertson2k@go.com,Female,31.117.65.136 +194,Gregory,Hawkins,ghawkins2l@joomla.org,Male,91.3.22.49 +195,Lisa,Perkins,lperkins2m@si.edu,Female,145.95.31.186 +196,Jacqueline,Anderson,janderson2n@cargocollective.com,Female,14.176.0.187 +197,Shirley,Diaz,sdiaz2o@ucla.edu,Female,207.12.95.46 +198,Nicole,Meyer,nmeyer2p@flickr.com,Female,231.79.115.13 +199,Mary,Gray,mgray2q@constantcontact.com,Female,210.116.64.253 +200,Jean,Mcdonald,jmcdonald2r@baidu.com,Female,122.239.235.117 diff --git a/test/integration/001_simple_copy_test/seed.sql b/test/integration/001_simple_copy_test/seed.sql deleted file mode 100644 index 28309b400f8..00000000000 --- a/test/integration/001_simple_copy_test/seed.sql +++ /dev/null @@ -1,111 +0,0 @@ -create table "{schema}"."seed" ( - id BIGSERIAL PRIMARY KEY, - first_name VARCHAR(50), - last_name VARCHAR(50), - email VARCHAR(50), - gender VARCHAR(50), - ip_address VARCHAR(20) -); - - -insert into "{schema}"."seed" (first_name, last_name, email, gender, ip_address) values -('Jack', 'Hunter', 'jhunter0@pbs.org', 'Male', '59.80.20.168'), -('Kathryn', 'Walker', 'kwalker1@ezinearticles.com', 'Female', '194.121.179.35'), -('Gerald', 'Ryan', 'gryan2@com.com', 'Male', '11.3.212.243'), -('Bonnie', 'Spencer', 'bspencer3@ameblo.jp', 'Female', '216.32.196.175'), -('Harold', 'Taylor', 'htaylor4@people.com.cn', 'Male', '253.10.246.136'), -('Jacqueline', 'Griffin', 'jgriffin5@t.co', 'Female', '16.13.192.220'), -('Wanda', 'Arnold', 'warnold6@google.nl', 'Female', '232.116.150.64'), -('Craig', 'Ortiz', 'cortiz7@sciencedaily.com', 'Male', '199.126.106.13'), -('Gary', 'Day', 'gday8@nih.gov', 'Male', '35.81.68.186'), -('Rose', 'Wright', 'rwright9@yahoo.co.jp', 'Female', '236.82.178.100'), -('Raymond', 'Kelley', 'rkelleya@fc2.com', 'Male', '213.65.166.67'), -('Gerald', 'Robinson', 'grobinsonb@disqus.com', 'Male', '72.232.194.193'), -('Mildred', 'Martinez', 'mmartinezc@samsung.com', 'Female', '198.29.112.5'), -('Dennis', 'Arnold', 'darnoldd@google.com', 'Male', '86.96.3.250'), -('Judy', 'Gray', 'jgraye@opensource.org', 'Female', '79.218.162.245'), -('Theresa', 'Garza', 'tgarzaf@epa.gov', 'Female', '21.59.100.54'), -('Gerald', 'Robertson', 'grobertsong@csmonitor.com', 'Male', '131.134.82.96'), -('Philip', 'Hernandez', 'phernandezh@adobe.com', 'Male', '254.196.137.72'), -('Julia', 'Gonzalez', 'jgonzalezi@cam.ac.uk', 'Female', '84.240.227.174'), -('Andrew', 'Davis', 'adavisj@patch.com', 'Male', '9.255.67.25'), -('Kimberly', 'Harper', 'kharperk@foxnews.com', 'Female', '198.208.120.253'), -('Mark', 'Martin', 'mmartinl@marketwatch.com', 'Male', '233.138.182.153'), -('Cynthia', 'Ruiz', 'cruizm@google.fr', 'Female', '18.178.187.201'), -('Samuel', 'Carroll', 'scarrolln@youtu.be', 'Male', '128.113.96.122'), -('Jennifer', 'Larson', 'jlarsono@vinaora.com', 'Female', '98.234.85.95'), -('Ashley', 'Perry', 'aperryp@rakuten.co.jp', 'Female', '247.173.114.52'), -('Howard', 'Rodriguez', 'hrodriguezq@shutterfly.com', 'Male', '231.188.95.26'), -('Amy', 'Brooks', 'abrooksr@theatlantic.com', 'Female', '141.199.174.118'), -('Louise', 'Warren', 'lwarrens@adobe.com', 'Female', '96.105.158.28'), -('Tina', 'Watson', 'twatsont@myspace.com', 'Female', '251.142.118.177'), -('Janice', 'Kelley', 'jkelleyu@creativecommons.org', 'Female', '239.167.34.233'), -('Terry', 'Mccoy', 'tmccoyv@bravesites.com', 'Male', '117.201.183.203'), -('Jeffrey', 'Morgan', 'jmorganw@surveymonkey.com', 'Male', '78.101.78.149'), -('Louis', 'Harvey', 'lharveyx@sina.com.cn', 'Male', '51.50.0.167'), -('Philip', 'Miller', 'pmillery@samsung.com', 'Male', '103.255.222.110'), -('Willie', 'Marshall', 'wmarshallz@ow.ly', 'Male', '149.219.91.68'), -('Patrick', 'Lopez', 'plopez10@redcross.org', 'Male', '250.136.229.89'), -('Adam', 'Jenkins', 'ajenkins11@harvard.edu', 'Male', '7.36.112.81'), -('Benjamin', 'Cruz', 'bcruz12@linkedin.com', 'Male', '32.38.98.15'), -('Ruby', 'Hawkins', 'rhawkins13@gmpg.org', 'Female', '135.171.129.255'), -('Carlos', 'Barnes', 'cbarnes14@a8.net', 'Male', '240.197.85.140'), -('Ruby', 'Griffin', 'rgriffin15@bravesites.com', 'Female', '19.29.135.24'), -('Sean', 'Mason', 'smason16@icq.com', 'Male', '159.219.155.249'), -('Anthony', 'Payne', 'apayne17@utexas.edu', 'Male', '235.168.199.218'), -('Steve', 'Cruz', 'scruz18@pcworld.com', 'Male', '238.201.81.198'), -('Anthony', 'Garcia', 'agarcia19@flavors.me', 'Male', '25.85.10.18'), -('Doris', 'Lopez', 'dlopez1a@sphinn.com', 'Female', '245.218.51.238'), -('Susan', 'Nichols', 'snichols1b@freewebs.com', 'Female', '199.99.9.61'), -('Wanda', 'Ferguson', 'wferguson1c@yahoo.co.jp', 'Female', '236.241.135.21'), -('Andrea', 'Pierce', 'apierce1d@google.co.uk', 'Female', '132.40.10.209'), -('Lawrence', 'Phillips', 'lphillips1e@jugem.jp', 'Male', '72.226.82.87'), -('Judy', 'Gilbert', 'jgilbert1f@multiply.com', 'Female', '196.250.15.142'), -('Eric', 'Williams', 'ewilliams1g@joomla.org', 'Male', '222.202.73.126'), -('Ralph', 'Romero', 'rromero1h@sogou.com', 'Male', '123.184.125.212'), -('Jean', 'Wilson', 'jwilson1i@ocn.ne.jp', 'Female', '176.106.32.194'), -('Lori', 'Reynolds', 'lreynolds1j@illinois.edu', 'Female', '114.181.203.22'), -('Donald', 'Moreno', 'dmoreno1k@bbc.co.uk', 'Male', '233.249.97.60'), -('Steven', 'Berry', 'sberry1l@eepurl.com', 'Male', '186.193.50.50'), -('Theresa', 'Shaw', 'tshaw1m@people.com.cn', 'Female', '120.37.71.222'), -('John', 'Stephens', 'jstephens1n@nationalgeographic.com', 'Male', '191.87.127.115'), -('Richard', 'Jacobs', 'rjacobs1o@state.tx.us', 'Male', '66.210.83.155'), -('Andrew', 'Lawson', 'alawson1p@over-blog.com', 'Male', '54.98.36.94'), -('Peter', 'Morgan', 'pmorgan1q@rambler.ru', 'Male', '14.77.29.106'), -('Nicole', 'Garrett', 'ngarrett1r@zimbio.com', 'Female', '21.127.74.68'), -('Joshua', 'Kim', 'jkim1s@edublogs.org', 'Male', '57.255.207.41'), -('Ralph', 'Roberts', 'rroberts1t@people.com.cn', 'Male', '222.143.131.109'), -('George', 'Montgomery', 'gmontgomery1u@smugmug.com', 'Male', '76.75.111.77'), -('Gerald', 'Alvarez', 'galvarez1v@flavors.me', 'Male', '58.157.186.194'), -('Donald', 'Olson', 'dolson1w@whitehouse.gov', 'Male', '69.65.74.135'), -('Carlos', 'Morgan', 'cmorgan1x@pbs.org', 'Male', '96.20.140.87'), -('Aaron', 'Stanley', 'astanley1y@webnode.com', 'Male', '163.119.217.44'), -('Virginia', 'Long', 'vlong1z@spiegel.de', 'Female', '204.150.194.182'), -('Robert', 'Berry', 'rberry20@tripadvisor.com', 'Male', '104.19.48.241'), -('Antonio', 'Brooks', 'abrooks21@unesco.org', 'Male', '210.31.7.24'), -('Ruby', 'Garcia', 'rgarcia22@ovh.net', 'Female', '233.218.162.214'), -('Jack', 'Hanson', 'jhanson23@blogtalkradio.com', 'Male', '31.55.46.199'), -('Kathryn', 'Nelson', 'knelson24@walmart.com', 'Female', '14.189.146.41'), -('Jason', 'Reed', 'jreed25@printfriendly.com', 'Male', '141.189.89.255'), -('George', 'Coleman', 'gcoleman26@people.com.cn', 'Male', '81.189.221.144'), -('Rose', 'King', 'rking27@ucoz.com', 'Female', '212.123.168.231'), -('Johnny', 'Holmes', 'jholmes28@boston.com', 'Male', '177.3.93.188'), -('Katherine', 'Gilbert', 'kgilbert29@altervista.org', 'Female', '199.215.169.61'), -('Joshua', 'Thomas', 'jthomas2a@ustream.tv', 'Male', '0.8.205.30'), -('Julie', 'Perry', 'jperry2b@opensource.org', 'Female', '60.116.114.192'), -('Richard', 'Perry', 'rperry2c@oracle.com', 'Male', '181.125.70.232'), -('Kenneth', 'Ruiz', 'kruiz2d@wikimedia.org', 'Male', '189.105.137.109'), -('Jose', 'Morgan', 'jmorgan2e@webnode.com', 'Male', '101.134.215.156'), -('Donald', 'Campbell', 'dcampbell2f@goo.ne.jp', 'Male', '102.120.215.84'), -('Debra', 'Collins', 'dcollins2g@uol.com.br', 'Female', '90.13.153.235'), -('Jesse', 'Johnson', 'jjohnson2h@stumbleupon.com', 'Male', '225.178.125.53'), -('Elizabeth', 'Stone', 'estone2i@histats.com', 'Female', '123.184.126.221'), -('Angela', 'Rogers', 'arogers2j@goodreads.com', 'Female', '98.104.132.187'), -('Emily', 'Dixon', 'edixon2k@mlb.com', 'Female', '39.190.75.57'), -('Albert', 'Scott', 'ascott2l@tinypic.com', 'Male', '40.209.13.189'), -('Barbara', 'Peterson', 'bpeterson2m@ow.ly', 'Female', '75.249.136.180'), -('Adam', 'Greene', 'agreene2n@fastcompany.com', 'Male', '184.173.109.144'), -('Earl', 'Sanders', 'esanders2o@hc360.com', 'Male', '247.34.90.117'), -('Angela', 'Brooks', 'abrooks2p@mtv.com', 'Female', '10.63.249.126'), -('Harold', 'Foster', 'hfoster2q@privacy.gov.au', 'Male', '139.214.40.244'), -('Carl', 'Meyer', 'cmeyer2r@disqus.com', 'Male', '204.117.7.88'); diff --git a/test/integration/001_simple_copy_test/test_simple_copy.py b/test/integration/001_simple_copy_test/test_simple_copy.py index 5f67d72eb9d..fcf720f7aac 100644 --- a/test/integration/001_simple_copy_test/test_simple_copy.py +++ b/test/integration/001_simple_copy_test/test_simple_copy.py @@ -11,57 +11,61 @@ def setUp(self): def schema(self): return "simple_copy_001" + @staticmethod + def dir(path): + return "test/integration/001_simple_copy_test/" + path.lstrip("/") + @property def models(self): - return "test/integration/001_simple_copy_test/models" + return self.dir("models") - @attr(type='postgres') + @attr(type="postgres") def test__postgres__simple_copy(self): - self.use_default_project() - self.use_profile('postgres') - self.run_sql_file("test/integration/001_simple_copy_test/seed.sql") + self.use_default_project({"data-paths": [self.dir("seed-initial")]}) + self.use_profile("postgres") + self.run_dbt(["seed"]) self.run_dbt() self.assertTablesEqual("seed","view") self.assertTablesEqual("seed","incremental") self.assertTablesEqual("seed","materialized") - self.run_sql_file("test/integration/001_simple_copy_test/update.sql") - + self.use_default_project({"data-paths": [self.dir("seed-update")]}) + self.run_dbt(["seed"]) self.run_dbt() self.assertTablesEqual("seed","view") self.assertTablesEqual("seed","incremental") self.assertTablesEqual("seed","materialized") - @attr(type='postgres') + @attr(type="postgres") def test__postgres__dbt_doesnt_run_empty_models(self): - self.use_default_project() - self.use_profile('postgres') - self.run_sql_file("test/integration/001_simple_copy_test/seed.sql") + self.use_default_project({"data-paths": [self.dir("seed-initial")]}) + self.use_profile("postgres") + self.run_dbt(["seed"]) self.run_dbt() models = self.get_models_in_schema() - self.assertFalse('empty' in models.keys()) - self.assertFalse('disabled' in models.keys()) + self.assertFalse("empty" in models.keys()) + self.assertFalse("disabled" in models.keys()) - @attr(type='snowflake') + @attr(type="snowflake") def test__snowflake__simple_copy(self): - self.use_default_project() - self.use_profile('snowflake') - self.run_sql_file("test/integration/001_simple_copy_test/seed.sql") + self.use_default_project({"data-paths": [self.dir("seed-initial")]}) + self.use_profile("snowflake") + self.run_dbt(["seed"]) self.run_dbt() self.assertTablesEqual("seed","view") self.assertTablesEqual("seed","incremental") self.assertTablesEqual("seed","materialized") - self.run_sql_file("test/integration/001_simple_copy_test/update.sql") - + self.use_default_project({"data-paths": [self.dir("seed-update")]}) + self.run_dbt(["seed"]) self.run_dbt() self.assertTablesEqual("seed","view") diff --git a/test/integration/001_simple_copy_test/update.sql b/test/integration/001_simple_copy_test/update.sql deleted file mode 100644 index e78d5d7d6df..00000000000 --- a/test/integration/001_simple_copy_test/update.sql +++ /dev/null @@ -1,101 +0,0 @@ -insert into "{schema}"."seed" (first_name, last_name, email, gender, ip_address) values -('Michael', 'Perez', 'mperez0@chronoengine.com', 'Male', '106.239.70.175'), -('Shawn', 'Mccoy', 'smccoy1@reddit.com', 'Male', '24.165.76.182'), -('Kathleen', 'Payne', 'kpayne2@cargocollective.com', 'Female', '113.207.168.106'), -('Jimmy', 'Cooper', 'jcooper3@cargocollective.com', 'Male', '198.24.63.114'), -('Katherine', 'Rice', 'krice4@typepad.com', 'Female', '36.97.186.238'), -('Sarah', 'Ryan', 'sryan5@gnu.org', 'Female', '119.117.152.40'), -('Martin', 'Mcdonald', 'mmcdonald6@opera.com', 'Male', '8.76.38.115'), -('Frank', 'Robinson', 'frobinson7@wunderground.com', 'Male', '186.14.64.194'), -('Jennifer', 'Franklin', 'jfranklin8@mail.ru', 'Female', '91.216.3.131'), -('Henry', 'Welch', 'hwelch9@list-manage.com', 'Male', '176.35.182.168'), -('Fred', 'Snyder', 'fsnydera@reddit.com', 'Male', '217.106.196.54'), -('Amy', 'Dunn', 'adunnb@nba.com', 'Female', '95.39.163.195'), -('Kathleen', 'Meyer', 'kmeyerc@cdc.gov', 'Female', '164.142.188.214'), -('Steve', 'Ferguson', 'sfergusond@reverbnation.com', 'Male', '138.22.204.251'), -('Teresa', 'Hill', 'thille@dion.ne.jp', 'Female', '82.84.228.235'), -('Amanda', 'Harper', 'aharperf@mail.ru', 'Female', '16.123.56.176'), -('Kimberly', 'Ray', 'krayg@xing.com', 'Female', '48.66.48.12'), -('Johnny', 'Knight', 'jknighth@jalbum.net', 'Male', '99.30.138.123'), -('Virginia', 'Freeman', 'vfreemani@tiny.cc', 'Female', '225.172.182.63'), -('Anna', 'Austin', 'aaustinj@diigo.com', 'Female', '62.111.227.148'), -('Willie', 'Hill', 'whillk@mail.ru', 'Male', '0.86.232.249'), -('Sean', 'Harris', 'sharrisl@zdnet.com', 'Male', '117.165.133.249'), -('Mildred', 'Adams', 'madamsm@usatoday.com', 'Female', '163.44.97.46'), -('David', 'Graham', 'dgrahamn@zimbio.com', 'Male', '78.13.246.202'), -('Victor', 'Hunter', 'vhuntero@ehow.com', 'Male', '64.156.179.139'), -('Aaron', 'Ruiz', 'aruizp@weebly.com', 'Male', '34.194.68.78'), -('Benjamin', 'Brooks', 'bbrooksq@jalbum.net', 'Male', '20.192.189.107'), -('Lisa', 'Wilson', 'lwilsonr@japanpost.jp', 'Female', '199.152.130.217'), -('Benjamin', 'King', 'bkings@comsenz.com', 'Male', '29.189.189.213'), -('Christina', 'Williamson', 'cwilliamsont@boston.com', 'Female', '194.101.52.60'), -('Jane', 'Gonzalez', 'jgonzalezu@networksolutions.com', 'Female', '109.119.12.87'), -('Thomas', 'Owens', 'towensv@psu.edu', 'Male', '84.168.213.153'), -('Katherine', 'Moore', 'kmoorew@naver.com', 'Female', '183.150.65.24'), -('Jennifer', 'Stewart', 'jstewartx@yahoo.com', 'Female', '38.41.244.58'), -('Sara', 'Tucker', 'stuckery@topsy.com', 'Female', '181.130.59.184'), -('Harold', 'Ortiz', 'hortizz@vkontakte.ru', 'Male', '198.231.63.137'), -('Shirley', 'James', 'sjames10@yelp.com', 'Female', '83.27.160.104'), -('Dennis', 'Johnson', 'djohnson11@slate.com', 'Male', '183.178.246.101'), -('Louise', 'Weaver', 'lweaver12@china.com.cn', 'Female', '1.14.110.18'), -('Maria', 'Armstrong', 'marmstrong13@prweb.com', 'Female', '181.142.1.249'), -('Gloria', 'Cruz', 'gcruz14@odnoklassniki.ru', 'Female', '178.232.140.243'), -('Diana', 'Spencer', 'dspencer15@ifeng.com', 'Female', '125.153.138.244'), -('Kelly', 'Nguyen', 'knguyen16@altervista.org', 'Female', '170.13.201.119'), -('Jane', 'Rodriguez', 'jrodriguez17@biblegateway.com', 'Female', '12.102.249.81'), -('Scott', 'Brown', 'sbrown18@geocities.jp', 'Male', '108.174.99.192'), -('Norma', 'Cruz', 'ncruz19@si.edu', 'Female', '201.112.156.197'), -('Marie', 'Peters', 'mpeters1a@mlb.com', 'Female', '231.121.197.144'), -('Lillian', 'Carr', 'lcarr1b@typepad.com', 'Female', '206.179.164.163'), -('Judy', 'Nichols', 'jnichols1c@t-online.de', 'Female', '158.190.209.194'), -('Billy', 'Long', 'blong1d@yahoo.com', 'Male', '175.20.23.160'), -('Howard', 'Reid', 'hreid1e@exblog.jp', 'Male', '118.99.196.20'), -('Laura', 'Ferguson', 'lferguson1f@tuttocitta.it', 'Female', '22.77.87.110'), -('Anne', 'Bailey', 'abailey1g@geocities.com', 'Female', '58.144.159.245'), -('Rose', 'Morgan', 'rmorgan1h@ehow.com', 'Female', '118.127.97.4'), -('Nicholas', 'Reyes', 'nreyes1i@google.ru', 'Male', '50.135.10.252'), -('Joshua', 'Kennedy', 'jkennedy1j@house.gov', 'Male', '154.6.163.209'), -('Paul', 'Watkins', 'pwatkins1k@upenn.edu', 'Male', '177.236.120.87'), -('Kathryn', 'Kelly', 'kkelly1l@businessweek.com', 'Female', '70.28.61.86'), -('Adam', 'Armstrong', 'aarmstrong1m@techcrunch.com', 'Male', '133.235.24.202'), -('Norma', 'Wallace', 'nwallace1n@phoca.cz', 'Female', '241.119.227.128'), -('Timothy', 'Reyes', 'treyes1o@google.cn', 'Male', '86.28.23.26'), -('Elizabeth', 'Patterson', 'epatterson1p@sun.com', 'Female', '139.97.159.149'), -('Edward', 'Gomez', 'egomez1q@google.fr', 'Male', '158.103.108.255'), -('David', 'Cox', 'dcox1r@friendfeed.com', 'Male', '206.80.80.58'), -('Brenda', 'Wood', 'bwood1s@over-blog.com', 'Female', '217.207.44.179'), -('Adam', 'Walker', 'awalker1t@blogs.com', 'Male', '253.211.54.93'), -('Michael', 'Hart', 'mhart1u@wix.com', 'Male', '230.206.200.22'), -('Jesse', 'Ellis', 'jellis1v@google.co.uk', 'Male', '213.254.162.52'), -('Janet', 'Powell', 'jpowell1w@un.org', 'Female', '27.192.194.86'), -('Helen', 'Ford', 'hford1x@creativecommons.org', 'Female', '52.160.102.168'), -('Gerald', 'Carpenter', 'gcarpenter1y@about.me', 'Male', '36.30.194.218'), -('Kathryn', 'Oliver', 'koliver1z@army.mil', 'Female', '202.63.103.69'), -('Alan', 'Berry', 'aberry20@gov.uk', 'Male', '246.157.112.211'), -('Harry', 'Andrews', 'handrews21@ameblo.jp', 'Male', '195.108.0.12'), -('Andrea', 'Hall', 'ahall22@hp.com', 'Female', '149.162.163.28'), -('Barbara', 'Wells', 'bwells23@behance.net', 'Female', '224.70.72.1'), -('Anne', 'Wells', 'awells24@apache.org', 'Female', '180.168.81.153'), -('Harry', 'Harper', 'hharper25@rediff.com', 'Male', '151.87.130.21'), -('Jack', 'Ray', 'jray26@wufoo.com', 'Male', '220.109.38.178'), -('Phillip', 'Hamilton', 'phamilton27@joomla.org', 'Male', '166.40.47.30'), -('Shirley', 'Hunter', 'shunter28@newsvine.com', 'Female', '97.209.140.194'), -('Arthur', 'Daniels', 'adaniels29@reuters.com', 'Male', '5.40.240.86'), -('Virginia', 'Rodriguez', 'vrodriguez2a@walmart.com', 'Female', '96.80.164.184'), -('Christina', 'Ryan', 'cryan2b@hibu.com', 'Female', '56.35.5.52'), -('Theresa', 'Mendoza', 'tmendoza2c@vinaora.com', 'Female', '243.42.0.210'), -('Jason', 'Cole', 'jcole2d@ycombinator.com', 'Male', '198.248.39.129'), -('Phillip', 'Bryant', 'pbryant2e@rediff.com', 'Male', '140.39.116.251'), -('Adam', 'Torres', 'atorres2f@sun.com', 'Male', '101.75.187.135'), -('Margaret', 'Johnston', 'mjohnston2g@ucsd.edu', 'Female', '159.30.69.149'), -('Paul', 'Payne', 'ppayne2h@hhs.gov', 'Male', '199.234.140.220'), -('Todd', 'Willis', 'twillis2i@businessweek.com', 'Male', '191.59.136.214'), -('Willie', 'Oliver', 'woliver2j@noaa.gov', 'Male', '44.212.35.197'), -('Frances', 'Robertson', 'frobertson2k@go.com', 'Female', '31.117.65.136'), -('Gregory', 'Hawkins', 'ghawkins2l@joomla.org', 'Male', '91.3.22.49'), -('Lisa', 'Perkins', 'lperkins2m@si.edu', 'Female', '145.95.31.186'), -('Jacqueline', 'Anderson', 'janderson2n@cargocollective.com', 'Female', '14.176.0.187'), -('Shirley', 'Diaz', 'sdiaz2o@ucla.edu', 'Female', '207.12.95.46'), -('Nicole', 'Meyer', 'nmeyer2p@flickr.com', 'Female', '231.79.115.13'), -('Mary', 'Gray', 'mgray2q@constantcontact.com', 'Female', '210.116.64.253'), -('Jean', 'Mcdonald', 'jmcdonald2r@baidu.com', 'Female', '122.239.235.117'); diff --git a/test/integration/005_simple_seed_test/seed.sql b/test/integration/005_simple_seed_test/seed.sql index 5ad307e67ff..d4eea3ae495 100644 --- a/test/integration/005_simple_seed_test/seed.sql +++ b/test/integration/005_simple_seed_test/seed.sql @@ -1,8 +1,8 @@ create table {schema}.seed_expected ( id INTEGER, - first_name VARCHAR(11), - email VARCHAR(31), - ip_address VARCHAR(15), + first_name TEXT, + email TEXT, + ip_address TEXT, birthday TIMESTAMP WITHOUT TIME ZONE ); diff --git a/test/integration/023_exit_codes_test/data-bad/data.csv b/test/integration/023_exit_codes_test/data-bad/data.csv index 37ff5b7356a..fcc8e001bbd 100644 --- a/test/integration/023_exit_codes_test/data-bad/data.csv +++ b/test/integration/023_exit_codes_test/data-bad/data.csv @@ -1,2 +1,2 @@ -"a,b,c -"1,\2,3,a,a,a +a,b,c +1,\2,3,a,a,a diff --git a/test/integration/023_exit_codes_test/test_exit_codes.py b/test/integration/023_exit_codes_test/test_exit_codes.py index e3640b7e80c..dfc4a9a2931 100644 --- a/test/integration/023_exit_codes_test/test_exit_codes.py +++ b/test/integration/023_exit_codes_test/test_exit_codes.py @@ -214,5 +214,8 @@ def project_config(self): @attr(type='postgres') def test_seed(self): - _, success = self.run_dbt_and_check(['seed']) - self.assertFalse(success) + try: + _, success = self.run_dbt_and_check(['seed']) + self.assertTrue(False) + except dbt.exceptions.CompilationException as e: + pass diff --git a/test/integration/base.py b/test/integration/base.py index 78aa2729d06..05d48487809 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -186,19 +186,20 @@ def setUp(self): self.run_sql('DROP SCHEMA IF EXISTS "{}" CASCADE'.format(self.unique_schema())) self.run_sql('CREATE SCHEMA "{}"'.format(self.unique_schema())) - def use_default_project(self): + def use_default_project(self, overrides=None): # create a dbt_project.yml base_project_config = { 'name': 'test', 'version': '1.0', 'test-paths': [], 'source-paths': [self.models], - 'profile': 'test' + 'profile': 'test', } project_config = {} project_config.update(base_project_config) project_config.update(self.project_config) + project_config.update(overrides or {}) with open("dbt_project.yml", 'w') as f: yaml.safe_dump(project_config, f, default_flow_style=True) @@ -431,3 +432,7 @@ def assertTableColumnsEqual(self, table_a, table_b, table_a_schema=None, table_b table_a_result, table_b_result ) + + def assertEquals(self, *args, **kwargs): + # assertEquals is deprecated. This makes the warnings less chatty + self.assertEqual(*args, **kwargs) diff --git a/test/setup_db.sh b/test/setup_db.sh index e315d76f02f..d373b7195a0 100644 --- a/test/setup_db.sh +++ b/test/setup_db.sh @@ -1,12 +1,19 @@ +#!/bin/bash set -x +# If you want to run this script for your own postgresql (run with +# docker-compose) it will look like this: +# PGHOST=127.0.0.1 PGUSER=root PGPASSWORD=password PGDATABASE=postgres \ +# bash test/setup.sh +PGUSER="${PGUSER:-postgres}" + createdb dbt -psql -c "CREATE ROLE root WITH UNENCRYPTED PASSWORD 'password';" -U postgres -psql -c "ALTER ROLE root WITH LOGIN;" -U postgres -psql -c "GRANT CREATE, CONNECT ON DATABASE dbt TO root;" -U postgres +psql -c "CREATE ROLE root WITH PASSWORD 'password';" +psql -c "ALTER ROLE root WITH LOGIN;" +psql -c "GRANT CREATE, CONNECT ON DATABASE dbt TO root;" -psql -c "CREATE ROLE noaccess WITH UNENCRYPTED PASSWORD 'password' NOSUPERUSER;" -U postgres; -psql -c "ALTER ROLE noaccess WITH LOGIN;" -U postgres -psql -c "GRANT CONNECT ON DATABASE dbt TO noaccess;" -U postgres; +psql -c "CREATE ROLE noaccess WITH PASSWORD 'password' NOSUPERUSER;" +psql -c "ALTER ROLE noaccess WITH LOGIN;" +psql -c "GRANT CONNECT ON DATABASE dbt TO noaccess;" set +x From 4eb75ec5b621eb9ddaf289552d2c30c675553d2f Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 12 Feb 2018 16:09:59 -0500 Subject: [PATCH 4/4] Bq date partitioning (#641) * first cut of date partitioning * cleanup, implement partitioning in materialization * update requirements.txt * wip for date partitioning with range * log data * arg handling, logging, cleanup + view compat for new bq version * add partitioning tests, compatibility with bq 0.29.0 release * pep8 * fix for strange error in appveyor * debug appveyor... * dumb * debugging weird bq adapter use in pg test * do not use read_project in bq tests * cleanup connections, initialize bq tests * remove debug lines * fix integration tests (actually) * warning for view creation which clobbers tables * add query timeout example for bq * no need to release connections in the adapter * partition_date interface change (wip) * list of dates for bq dp tables * tiny fixes for crufty dbt_project.yml files * rm debug line * fix tests --- dbt/adapters/bigquery.py | 156 ++++++++++-------- dbt/context/common.py | 20 ++- dbt/exceptions.py | 8 +- .../global_project/macros/etc/bigquery.sql | 4 + .../global_project/macros/etc/datetime.sql | 56 +++++++ .../macros/materializations/bigquery.sql | 67 +++++++- dbt/project.py | 6 + requirements.txt | 2 +- sample.profiles.yml | 1 + setup.py | 2 +- .../dp-models/confirmation.sql | 20 +++ .../dp-models/events_20180101.sql | 4 + .../dp-models/events_20180102.sql | 4 + .../dp-models/events_20180103.sql | 4 + .../dp-models/partitioned.sql | 16 ++ .../dp-models/partitioned_simple.sql | 14 ++ .../022_bigquery_test/dp-models/schema.yml | 18 ++ .../test_bigquery_date_partitioning.py | 33 ++++ .../test_simple_bigquery_view.py | 18 +- .../023_exit_codes_test/test_exit_codes.py | 3 - test/integration/base.py | 1 + 21 files changed, 363 insertions(+), 94 deletions(-) create mode 100644 dbt/include/global_project/macros/etc/bigquery.sql create mode 100644 dbt/include/global_project/macros/etc/datetime.sql create mode 100644 test/integration/022_bigquery_test/dp-models/confirmation.sql create mode 100644 test/integration/022_bigquery_test/dp-models/events_20180101.sql create mode 100644 test/integration/022_bigquery_test/dp-models/events_20180102.sql create mode 100644 test/integration/022_bigquery_test/dp-models/events_20180103.sql create mode 100644 test/integration/022_bigquery_test/dp-models/partitioned.sql create mode 100644 test/integration/022_bigquery_test/dp-models/partitioned_simple.sql create mode 100644 test/integration/022_bigquery_test/dp-models/schema.yml create mode 100644 test/integration/022_bigquery_test/test_bigquery_date_partitioning.py diff --git a/dbt/adapters/bigquery.py b/dbt/adapters/bigquery.py index 4e3664986ea..9d2e6eb7b97 100644 --- a/dbt/adapters/bigquery.py +++ b/dbt/adapters/bigquery.py @@ -17,7 +17,6 @@ import google.cloud.bigquery import time -import uuid class BigQueryAdapter(PostgresAdapter): @@ -27,7 +26,8 @@ class BigQueryAdapter(PostgresAdapter): "execute_model", "drop", "execute", - "quote_schema_and_table" + "quote_schema_and_table", + "make_date_partitioned_table" ] SCOPE = ('https://www.googleapis.com/auth/bigquery', @@ -150,27 +150,33 @@ def query_for_existing(cls, profile, schemas, model_name=None): if not isinstance(schemas, (list, tuple)): schemas = [schemas] + conn = cls.get_connection(profile, model_name) + client = conn.get('handle') + all_tables = [] for schema in schemas: dataset = cls.get_dataset(profile, schema, model_name) - all_tables.extend(dataset.list_tables()) + all_tables.extend(client.list_tables(dataset)) - relation_type_lookup = { + relation_types = { 'TABLE': 'table', 'VIEW': 'view', 'EXTERNAL': 'external' } - existing = [(table.name, relation_type_lookup.get(table.table_type)) + existing = [(table.table_id, relation_types.get(table.table_type)) for table in all_tables] return dict(existing) @classmethod def drop(cls, profile, schema, relation, relation_type, model_name=None): + conn = cls.get_connection(profile, model_name) + client = conn.get('handle') + dataset = cls.get_dataset(profile, schema, model_name) relation_object = dataset.table(relation) - relation_object.delete() + client.delete_table(relation_object) @classmethod def rename(cls, profile, schema, from_name, to_name, model_name=None): @@ -183,19 +189,22 @@ def get_timeout(cls, conn): return credentials.get('timeout_seconds', cls.QUERY_TIMEOUT) @classmethod - def materialize_as_view(cls, profile, dataset, model_name, model_sql): - view = dataset.table(model_name) + def materialize_as_view(cls, profile, dataset, model): + model_name = model.get('name') + model_sql = model.get('injected_sql') + + conn = cls.get_connection(profile, model_name) + client = conn.get('handle') + + view_ref = dataset.table(model_name) + view = google.cloud.bigquery.Table(view_ref) view.view_query = model_sql view.view_use_legacy_sql = False logger.debug("Model SQL ({}):\n{}".format(model_name, model_sql)) with cls.exception_handler(profile, model_sql, model_name, model_name): - view.create() - - if view.created is None: - msg = "Error creating view {}".format(model_name) - raise dbt.exceptions.RuntimeException(msg) + client.create_table(view) return "CREATE VIEW" @@ -215,86 +224,94 @@ def poll_until_job_completes(cls, job, timeout): raise job.exception() @classmethod - def materialize_as_table(cls, profile, dataset, model_name, model_sql): + def make_date_partitioned_table(cls, profile, dataset_name, identifier, + model_name=None): conn = cls.get_connection(profile, model_name) client = conn.get('handle') - table = dataset.table(model_name) - job_id = 'dbt-create-{}-{}'.format(model_name, uuid.uuid4()) - job = client.run_async_query(job_id, model_sql) - job.use_legacy_sql = False - job.destination = table - job.write_disposition = 'WRITE_TRUNCATE' - job.begin() + dataset = cls.get_dataset(profile, dataset_name, identifier) + table_ref = dataset.table(identifier) + table = google.cloud.bigquery.Table(table_ref) + table.partitioning_type = 'DAY' - cls.release_connection(profile, model_name) + return client.create_table(table) - logger.debug("Model SQL ({}):\n{}".format(model_name, model_sql)) + @classmethod + def materialize_as_table(cls, profile, dataset, model, model_sql, + decorator=None): + model_name = model.get('name') + + conn = cls.get_connection(profile, model_name) + client = conn.get('handle') + + if decorator is None: + table_name = model_name + else: + table_name = "{}${}".format(model_name, decorator) + + table_ref = dataset.table(table_name) + job_config = google.cloud.bigquery.QueryJobConfig() + job_config.destination = table_ref + job_config.write_disposition = 'WRITE_TRUNCATE' + + logger.debug("Model SQL ({}):\n{}".format(table_name, model_sql)) + query_job = client.query(model_sql, job_config=job_config) + # this waits for the job to complete with cls.exception_handler(profile, model_sql, model_name, model_name): - cls.poll_until_job_completes(job, cls.get_timeout(conn)) + query_job.result(timeout=cls.get_timeout(conn)) return "CREATE TABLE" @classmethod - def execute_model(cls, profile, model, materialization, model_name=None): + def execute_model(cls, profile, model, materialization, sql_override=None, + decorator=None, model_name=None): + + if sql_override is None: + sql_override = model.get('injected_sql') if flags.STRICT_MODE: connection = cls.get_connection(profile, model.get('name')) validate_connection(connection) - cls.release_connection(profile, model.get('name')) model_name = model.get('name') model_schema = model.get('schema') - model_sql = model.get('injected_sql') dataset = cls.get_dataset(profile, model_schema, model_name) if materialization == 'view': - res = cls.materialize_as_view(profile, dataset, model_name, - model_sql) + res = cls.materialize_as_view(profile, dataset, model) elif materialization == 'table': - res = cls.materialize_as_table(profile, dataset, model_name, - model_sql) + res = cls.materialize_as_table(profile, dataset, model, + sql_override, decorator) else: msg = "Invalid relation type: '{}'".format(materialization) raise dbt.exceptions.RuntimeException(msg, model) return res - @classmethod - def fetch_query_results(cls, query): - all_rows = [] - - rows = query.rows - token = query.page_token - - while True: - all_rows.extend(rows) - if token is None: - break - rows, total_count, token = query.fetch_data(page_token=token) - return all_rows - @classmethod def execute(cls, profile, sql, model_name=None, fetch=False, **kwargs): conn = cls.get_connection(profile, model_name) client = conn.get('handle') - query = client.run_sync_query(sql) - query.timeout_ms = cls.get_timeout(conn) * 1000 - query.use_legacy_sql = False - debug_message = "Fetching data for query {}:\n{}" logger.debug(debug_message.format(model_name, sql)) - query.run() + job_config = google.cloud.bigquery.QueryJobConfig() + job_config.use_legacy_sql = False + query_job = client.query(sql, job_config) + + # this blocks until the query has completed + with cls.exception_handler(profile, 'create dataset', model_name): + iterator = query_job.result() res = [] if fetch: - res = cls.fetch_query_results(query) + res = list(iterator) - status = 'ERROR' if query.errors else 'OK' + # If we get here, the query succeeded + status = 'OK' return status, res @classmethod @@ -310,15 +327,20 @@ def add_begin_query(cls, profile, name): def create_schema(cls, profile, schema, model_name=None): logger.debug('Creating schema "%s".', schema) - dataset = cls.get_dataset(profile, schema, model_name) + conn = cls.get_connection(profile, model_name) + client = conn.get('handle') + dataset = cls.get_dataset(profile, schema, model_name) with cls.exception_handler(profile, 'create dataset', model_name): - dataset.create() + client.create_dataset(dataset) @classmethod - def drop_tables_in_schema(cls, dataset): - for table in dataset.list_tables(): - table.delete() + def drop_tables_in_schema(cls, profile, dataset): + conn = cls.get_connection(profile) + client = conn.get('handle') + + for table in client.list_tables(dataset): + client.delete_table(table.reference) @classmethod def drop_schema(cls, profile, schema, model_name=None): @@ -327,21 +349,22 @@ def drop_schema(cls, profile, schema, model_name=None): if not cls.check_schema_exists(profile, schema, model_name): return - dataset = cls.get_dataset(profile, schema, model_name) + conn = cls.get_connection(profile) + client = conn.get('handle') + dataset = cls.get_dataset(profile, schema, model_name) with cls.exception_handler(profile, 'drop dataset', model_name): - cls.drop_tables_in_schema(dataset) - dataset.delete() + cls.drop_tables_in_schema(profile, dataset) + client.delete_dataset(dataset) @classmethod def get_existing_schemas(cls, profile, model_name=None): conn = cls.get_connection(profile, model_name) - client = conn.get('handle') with cls.exception_handler(profile, 'list dataset', model_name): all_datasets = client.list_datasets() - return [ds.name for ds in all_datasets] + return [ds.dataset_id for ds in all_datasets] @classmethod def get_columns_in_table(cls, profile, schema_name, table_name, @@ -352,20 +375,19 @@ def get_columns_in_table(cls, profile, schema_name, table_name, @classmethod def check_schema_exists(cls, profile, schema, model_name=None): conn = cls.get_connection(profile, model_name) - client = conn.get('handle') with cls.exception_handler(profile, 'get dataset', model_name): all_datasets = client.list_datasets() - return any([ds.name == schema for ds in all_datasets]) + return any([ds.dataset_id == schema for ds in all_datasets]) @classmethod def get_dataset(cls, profile, dataset_name, model_name=None): conn = cls.get_connection(profile, model_name) - client = conn.get('handle') - dataset = client.dataset(dataset_name) - return dataset + + dataset_ref = client.dataset(dataset_name) + return google.cloud.bigquery.Dataset(dataset_ref) @classmethod def warning_on_hooks(cls, hook_type): diff --git a/dbt/context/common.py b/dbt/context/common.py index 072cb62a7e4..64d62c04b45 100644 --- a/dbt/context/common.py +++ b/dbt/context/common.py @@ -1,6 +1,5 @@ import json import os -import pytz import voluptuous from dbt.adapters.factory import get_adapter @@ -17,6 +16,12 @@ from dbt.logger import GLOBAL_LOGGER as logger # noqa +# These modules are added to the context. Consider alternative +# approaches which will extend well to potentially many modules +import pytz +import datetime + + class DatabaseWrapper(object): """ Wrapper for runtime database interaction. Should only call adapter @@ -248,6 +253,15 @@ def tojson(value, default=None): return default +def try_or_compiler_error(model): + def impl(message_if_exception, func, *args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as e: + dbt.exceptions.raise_compiler_error(message_if_exception, model) + return impl + + def _return(value): raise dbt.exceptions.MacroReturn(value) @@ -291,6 +305,7 @@ def generate(model, project, flat_graph, provider=None): "model": model, "modules": { "pytz": pytz, + "datetime": datetime }, "post_hooks": post_hooks, "pre_hooks": pre_hooks, @@ -302,7 +317,8 @@ def generate(model, project, flat_graph, provider=None): "fromjson": fromjson, "tojson": tojson, "target": target, - "this": dbt.utils.Relation(profile, adapter, model, use_temp=True) + "this": dbt.utils.Relation(profile, adapter, model, use_temp=True), + "try_or_compiler_error": try_or_compiler_error(model) }) context = _add_tracking(context) diff --git a/dbt/exceptions.py b/dbt/exceptions.py index 56c65928b91..a827967f237 100644 --- a/dbt/exceptions.py +++ b/dbt/exceptions.py @@ -65,9 +65,13 @@ def __str__(self, prefix="! "): if self.node is not None: node_string = " in {}".format(self.node_to_string(self.node)) + if hasattr(self.msg, 'split'): + split_msg = self.msg.split("\n") + else: + split_msg = basestring(self.msg).split("\n") + lines = ["{}{}".format(self.type + ' Error', - node_string)] + \ - self.msg.split("\n") + node_string)] + split_msg lines += self.process_stack() diff --git a/dbt/include/global_project/macros/etc/bigquery.sql b/dbt/include/global_project/macros/etc/bigquery.sql new file mode 100644 index 00000000000..7bd0cba9086 --- /dev/null +++ b/dbt/include/global_project/macros/etc/bigquery.sql @@ -0,0 +1,4 @@ + +{% macro date_sharded_table(base_name) %} + {{ return(base_name ~ "[DBT__PARTITION_DATE]") }} +{% endmacro %} diff --git a/dbt/include/global_project/macros/etc/datetime.sql b/dbt/include/global_project/macros/etc/datetime.sql new file mode 100644 index 00000000000..28a7654110b --- /dev/null +++ b/dbt/include/global_project/macros/etc/datetime.sql @@ -0,0 +1,56 @@ + +{% macro convert_datetime(date_str, date_fmt) %} + + {% set error_msg -%} + The provided partition date '{{ date_str }}' does not match the expected format '{{ date_fmt }}' + {%- endset %} + + {% set res = try_or_compiler_error(error_msg, modules.datetime.datetime.strptime, date_str.strip(), date_fmt) %} + {{ return(res) }} + +{% endmacro %} + +{% macro dates_in_range(start_date_str, end_date_str=none, in_fmt="%Y%m%d", out_fmt="%Y%m%d") %} + {% set end_date_str = start_date_str if end_date_str is none else end_date_str %} + + {% set start_date = convert_datetime(start_date_str, in_fmt) %} + {% set end_date = convert_datetime(end_date_str, in_fmt) %} + + {% set day_count = (end_date - start_date).days %} + {% if day_count < 0 %} + {% set msg -%} + Partiton start date is after the end date ({{ start_date }}, {{ end_date }}) + {%- endset %} + + {{ exceptions.raise_compiler_error(msg, model) }} + {% endif %} + + {% set date_list = [] %} + {% for i in range(0, day_count + 1) %} + {% set the_date = (modules.datetime.timedelta(days=i) + start_date) %} + {% if not out_fmt %} + {% set _ = date_list.append(the_date) %} + {% else %} + {% set _ = date_list.append(the_date.strftime(out_fmt)) %} + {% endif %} + {% endfor %} + + {{ return(date_list) }} +{% endmacro %} + +{% macro partition_range(raw_partition_date, date_fmt='%Y%m%d') %} + {% set partition_range = (raw_partition_date | string).split(",") %} + + {% if (partition_range | length) == 1 %} + {% set start_date = partition_range[0] %} + {% set end_date = none %} + {% elif (partition_range | length) == 2 %} + {% set start_date = partition_range[0] %} + {% set end_date = partition_range[1] %} + {% else %} + {{ dbt.exceptions.raise_compiler_error("Invalid partition time. Expected format: {Start Date}[,{End Date}]. Got: " ~ raw_partition_date) }} + {% endif %} + + {{ return(dates_in_range(start_date, end_date, in_fmt=date_fmt)) }} +{% endmacro %} + diff --git a/dbt/include/global_project/macros/materializations/bigquery.sql b/dbt/include/global_project/macros/materializations/bigquery.sql index e61373a4634..f616412a593 100644 --- a/dbt/include/global_project/macros/materializations/bigquery.sql +++ b/dbt/include/global_project/macros/materializations/bigquery.sql @@ -1,12 +1,20 @@ {% materialization view, adapter='bigquery' -%} {%- set identifier = model['name'] -%} - {%- set tmp_identifier = identifier + '__dbt_tmp' -%} {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} {%- set existing = adapter.query_for_existing(schema) -%} {%- set existing_type = existing.get(identifier) -%} {%- if existing_type is not none -%} + {%- if existing_type == 'table' and not flags.FULL_REFRESH -%} + {# this is only intended for date partitioned tables, but we cant see that field in the context #} + {% set error_message -%} + Trying to create model '{{ identifier }}' as a view, but it already exists as a table. + Either drop the '{{ schema }}.{{ identifier }}' table manually, or use --full-refresh + {%- endset %} + {{ exceptions.raise_compiler_error(error_message) }} + {%- endif -%} + {{ adapter.drop(schema, identifier, existing_type) }} {%- endif -%} @@ -16,20 +24,69 @@ {%- endmaterialization %} + +{% macro make_date_partitioned_table(model, dates, should_create, verbose=False) %} + + {% if should_create %} + {{ adapter.make_date_partitioned_table(model.schema, model.name) }} + {% endif %} + + {% for date in dates %} + {% set date = (date | string) %} + {% if verbose %} + {% set table_start_time = modules.datetime.datetime.now().strftime("%H:%M:%S") %} + {{ log(table_start_time ~ ' | -> Running for day ' ~ date, info=True) }} + {% endif %} + + {% set fixed_sql = model['injected_sql'] | replace('[DBT__PARTITION_DATE]', date) %} + {% set _ = adapter.execute_model(model, 'table', fixed_sql, decorator=date) %} + {% endfor %} + + {% set num_days = dates | length %} + {% if num_days == 1 %} + {% set result_str = 'CREATED 1 PARTITION' %} + {% else %} + {% set result_str = 'CREATED ' ~ num_days ~ ' PARTITIONS' %} + {% endif %} + + {{ return(result_str) }} + +{% endmacro %} + {% materialization table, adapter='bigquery' -%} {%- set identifier = model['name'] -%} - {%- set tmp_identifier = identifier + '__dbt_tmp' -%} {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} {%- set existing = adapter.query_for_existing(schema) -%} {%- set existing_type = existing.get(identifier) -%} + {%- set verbose = config.get('verbose', False) -%} + {%- set partitions = config.get('partitions') -%} - {%- if existing_type is not none -%} - {{ adapter.drop(schema, identifier, existing_type) }} + {% if partitions %} + {% if partitions is number or partitions is string %} + {% set partitions = [(partitions | string)] %} + {% endif %} + + {% if partitions is not iterable %} + {{ exceptions.raise_compiler_error("Provided `partitions` configuration is not a list. Got: " ~ partitions, model) }} + {% endif %} + {% endif %} + + {# + Since dbt uses WRITE_TRUNCATE mode for tables, we only need to drop this thing + if it is not a table. If it _is_ already a table, then we can overwrite it without downtime + #} + {%- if existing_type is not none and existing_type != 'table' -%} + {{ adapter.drop(schema, identifier, existing_type) }} {%- endif -%} -- build model - {% set result = adapter.execute_model(model, 'table') %} + {% if partitions %} + {% set result = make_date_partitioned_table(model, partitions, (existing_type != 'table'), verbose) %} + {% else %} + {% set result = adapter.execute_model(model, 'table') %} + {% endif %} + {{ store_result('main', status=result) }} {% endmaterialization %} diff --git a/dbt/project.py b/dbt/project.py index bdb1f811076..2fb7b0bb156 100644 --- a/dbt/project.py +++ b/dbt/project.py @@ -87,6 +87,12 @@ def __init__(self, cfg, profiles, profiles_dir, profile_to_load=None, "Could not find profile named '{}'" .format(self.profile_to_load), self) + if self.cfg.get('models') is None: + self.cfg['models'] = {} + + if self.cfg['models'].get('vars') is None: + self.cfg['models']['vars'] = {} + global_vars = dbt.utils.parse_cli_vars(getattr(args, 'vars', '{}')) if 'vars' not in self.cfg['models']: self.cfg['models']['vars'] = {} diff --git a/requirements.txt b/requirements.txt index 9c2735f1523..670aa77121b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,5 +11,5 @@ celery==3.1.23 voluptuous==0.10.5 snowflake-connector-python>=1.4.9 colorama==0.3.9 -google-cloud-bigquery==0.26.0 +google-cloud-bigquery==0.29.0 agate>=1.6,<2 diff --git a/sample.profiles.yml b/sample.profiles.yml index 7106e6814da..cd898ed5a59 100644 --- a/sample.profiles.yml +++ b/sample.profiles.yml @@ -65,6 +65,7 @@ config: # project: [GCP project id] # schema: [dbt schema] # threads: [between 1 and 8] +# timeout_seconds: 300 # # # 2. use a service account keyfile # [target-name-2]: diff --git a/setup.py b/setup.py index 23ecc1bc669..b85ed9fd709 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ 'voluptuous==0.10.5', 'snowflake-connector-python>=1.4.9', 'colorama==0.3.9', - 'google-cloud-bigquery==0.26.0', + 'google-cloud-bigquery==0.29.0', 'agate>=1.6,<2', ] ) diff --git a/test/integration/022_bigquery_test/dp-models/confirmation.sql b/test/integration/022_bigquery_test/dp-models/confirmation.sql new file mode 100644 index 00000000000..858c0e2e32f --- /dev/null +++ b/test/integration/022_bigquery_test/dp-models/confirmation.sql @@ -0,0 +1,20 @@ + +-- This model checks to confirm that each date partition was created correctly. +-- Columns day_1, day_2, and day_3 should have a value of 1, and count_days should be 3 + +with base as ( + + select + case when _PARTITIONTIME = '2018-01-01' then 1 else 0 end as day_1, + case when _PARTITIONTIME = '2018-01-02' then 1 else 0 end as day_2, + case when _PARTITIONTIME = '2018-01-03' then 1 else 0 end as day_3 + from {{ ref('partitioned') }} + +) + +select distinct + sum(day_1) over () as day_1, + sum(day_2) over () as day_2, + sum(day_3) over () as day_3, + count(*) over () as count_days +from base diff --git a/test/integration/022_bigquery_test/dp-models/events_20180101.sql b/test/integration/022_bigquery_test/dp-models/events_20180101.sql new file mode 100644 index 00000000000..9a8f54d5bcb --- /dev/null +++ b/test/integration/022_bigquery_test/dp-models/events_20180101.sql @@ -0,0 +1,4 @@ + +{{ config(materialized='table') }} + +select 1 as id diff --git a/test/integration/022_bigquery_test/dp-models/events_20180102.sql b/test/integration/022_bigquery_test/dp-models/events_20180102.sql new file mode 100644 index 00000000000..63bfcdc13fe --- /dev/null +++ b/test/integration/022_bigquery_test/dp-models/events_20180102.sql @@ -0,0 +1,4 @@ + +{{ config(materialized='table') }} + +select 2 as id diff --git a/test/integration/022_bigquery_test/dp-models/events_20180103.sql b/test/integration/022_bigquery_test/dp-models/events_20180103.sql new file mode 100644 index 00000000000..09a9f02c7b1 --- /dev/null +++ b/test/integration/022_bigquery_test/dp-models/events_20180103.sql @@ -0,0 +1,4 @@ + +{{ config(materialized='table') }} + +select 3 as id diff --git a/test/integration/022_bigquery_test/dp-models/partitioned.sql b/test/integration/022_bigquery_test/dp-models/partitioned.sql new file mode 100644 index 00000000000..5d77021d30c --- /dev/null +++ b/test/integration/022_bigquery_test/dp-models/partitioned.sql @@ -0,0 +1,16 @@ + +{{ + config( + materialized='table', + partitions=['20180101', '20180102', '20180103'], + verbose=True + ) +}} + +-- Hack to make sure our events models run first. +-- In practice, these would be source data +-- {{ ref('events_20180101') }} +-- {{ ref('events_20180102') }} +-- {{ ref('events_20180103') }} + +select * from `{{ this.schema }}`.`{{ date_sharded_table('events_') }}` diff --git a/test/integration/022_bigquery_test/dp-models/partitioned_simple.sql b/test/integration/022_bigquery_test/dp-models/partitioned_simple.sql new file mode 100644 index 00000000000..af65072bead --- /dev/null +++ b/test/integration/022_bigquery_test/dp-models/partitioned_simple.sql @@ -0,0 +1,14 @@ + +{{ + config( + materialized='table', + partition_date='20180101', + verbose=True + ) +}} + +-- Hack to make sure our events models run first. +-- In practice, these would be source data +-- {{ ref('events_20180101') }} + +select * from `{{ this.schema }}`.`events_20180101` diff --git a/test/integration/022_bigquery_test/dp-models/schema.yml b/test/integration/022_bigquery_test/dp-models/schema.yml new file mode 100644 index 00000000000..d699498135c --- /dev/null +++ b/test/integration/022_bigquery_test/dp-models/schema.yml @@ -0,0 +1,18 @@ + +# check that this exists +partitioned_simple: + constraints: + unique: + - id + not_null: + - id + +confirmation: + constraints: + accepted_values: + - {field: cast(day_1 as string), values:[1] } + - {field: cast(day_2 as string), values:[1] } + - {field: cast(day_3 as string), values:[1] } + - {field: cast(count_days as string), values:[3] } + + diff --git a/test/integration/022_bigquery_test/test_bigquery_date_partitioning.py b/test/integration/022_bigquery_test/test_bigquery_date_partitioning.py new file mode 100644 index 00000000000..3277c70f490 --- /dev/null +++ b/test/integration/022_bigquery_test/test_bigquery_date_partitioning.py @@ -0,0 +1,33 @@ +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest, FakeArgs + + +class TestBigqueryDatePartitioning(DBTIntegrationTest): + + @property + def schema(self): + return "bigquery_test_022" + + @property + def models(self): + return "test/integration/022_bigquery_test/dp-models" + + @property + def profile_config(self): + return self.bigquery_profile() + + @attr(type='bigquery') + def test__bigquery_date_partitioning(self): + self.use_profile('bigquery') + self.use_default_project() + self.run_dbt() + + test_results = self.run_dbt(['test']) + + self.assertTrue(len(test_results) > 0) + for result in test_results: + self.assertFalse(result.errored) + self.assertFalse(result.skipped) + # status = # of failing rows + self.assertEqual(result.status, 0) + diff --git a/test/integration/022_bigquery_test/test_simple_bigquery_view.py b/test/integration/022_bigquery_test/test_simple_bigquery_view.py index c49a3c64670..c14b23c37c6 100644 --- a/test/integration/022_bigquery_test/test_simple_bigquery_view.py +++ b/test/integration/022_bigquery_test/test_simple_bigquery_view.py @@ -1,15 +1,9 @@ from nose.plugins.attrib import attr from test.integration.base import DBTIntegrationTest, FakeArgs -from dbt.task.test import TestTask -from dbt.project import read_project - class TestSimpleBigQueryRun(DBTIntegrationTest): - def setUp(self): - pass - @property def schema(self): return "bigquery_test_022" @@ -24,12 +18,9 @@ def project_config(self): 'macro-paths': ['test/integration/022_bigquery_test/macros'], } - def run_schema_validations(self): - project = read_project('dbt_project.yml') - args = FakeArgs() - - test_task = TestTask(args, project) - return test_task.run() + @property + def profile_config(self): + return self.bigquery_profile() @attr(type='bigquery') def test__bigquery_simple_run(self): @@ -37,7 +28,8 @@ def test__bigquery_simple_run(self): self.use_default_project() self.run_dbt() - test_results = self.run_schema_validations() + # The 'dupe' model should fail, but all others should pass + test_results = self.run_dbt(['test'], expect_pass=False) for result in test_results: if 'dupe' in result.node.get('name'): diff --git a/test/integration/023_exit_codes_test/test_exit_codes.py b/test/integration/023_exit_codes_test/test_exit_codes.py index dfc4a9a2931..01af65ce2ec 100644 --- a/test/integration/023_exit_codes_test/test_exit_codes.py +++ b/test/integration/023_exit_codes_test/test_exit_codes.py @@ -6,9 +6,6 @@ class TestExitCodes(DBTIntegrationTest): - def setUp(self): - pass - @property def schema(self): return "exit_codes_test_023" diff --git a/test/integration/base.py b/test/integration/base.py index 05d48487809..c7066f4c62d 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -173,6 +173,7 @@ def setUp(self): # it's important to use a different connection handle here so # we don't look into an incomplete transaction + adapter.cleanup_connections() connection = adapter.acquire_connection(profile, '__test') self.handle = connection.get('handle') self.adapter_type = profile.get('type')