From 8aec262e3ad1b55d8711574fe93a596e99b17793 Mon Sep 17 00:00:00 2001 From: Marina Samuel Date: Fri, 23 Nov 2018 17:26:23 -0500 Subject: [PATCH 01/21] Process extra column metadata for a few sql-based data sources. --- redash/query_runner/__init__.py | 18 ++++++++++++++++++ redash/query_runner/athena.py | 16 +++++++++++++--- redash/query_runner/mysql.py | 15 ++++++++++++--- redash/query_runner/pg.py | 21 ++++++++++++++++----- redash/query_runner/presto.py | 16 +++++++++++----- 5 files changed, 70 insertions(+), 16 deletions(-) diff --git a/redash/query_runner/__init__.py b/redash/query_runner/__init__.py index c252d841b4..bd3d13223c 100644 --- a/redash/query_runner/__init__.py +++ b/redash/query_runner/__init__.py @@ -54,6 +54,7 @@ class NotSupported(Exception): class BaseQueryRunner(object): noop_query = None + data_sample_query = None def __init__(self, configuration): self.syntax = 'sql' @@ -118,6 +119,23 @@ def _run_query_internal(self, query): raise Exception("Failed running query [%s]." % query) return json_loads(results)['rows'] + def _get_table_sample(self, table_name): + if self.data_sample_query is None: + raise NotImplementedError() + + query = self.data_sample_query.format(table=table_name) + + results, error = self.run_query(query, None) + if error is not None: + raise Exception("Failed getting table sample.") + + sample = {} + rows = json_loads(results).get('rows', []) + if len(rows) > 0: + sample = rows[0] + + return sample + @classmethod def to_dict(cls): return { diff --git a/redash/query_runner/athena.py b/redash/query_runner/athena.py index 67d7e19fff..bf610fb4a3 100644 --- a/redash/query_runner/athena.py +++ b/redash/query_runner/athena.py @@ -43,6 +43,7 @@ def format(self, operation, parameters=None): class Athena(BaseQueryRunner): noop_query = 'SELECT 1' + data_sample_query = "SELECT * FROM {table} LIMIT 1" @classmethod def name(cls): @@ -143,7 +144,7 @@ def get_schema(self, get_stats=False): schema = {} query = """ - SELECT table_schema, table_name, column_name + SELECT table_schema, table_name, column_name, data_type AS column_type FROM information_schema.columns WHERE table_schema NOT IN ('information_schema') """ @@ -153,11 +154,20 @@ def get_schema(self, get_stats=False): raise Exception("Failed getting schema.") results = json_loads(results) - for row in results['rows']: + table_samples = {} + + for i, row in enumerate(results['rows']): table_name = '{0}.{1}'.format(row['table_schema'], row['table_name']) if table_name not in schema: - schema[table_name] = {'name': table_name, 'columns': []} + schema[table_name] = {'name': table_name, 'columns': [], 'metadata': []} + table_samples[table_name] = self._get_table_sample(table_name) + schema[table_name]['columns'].append(row['column_name']) + schema[table_name]['metadata'].append({ + "name": row['column_name'], + "type": row['column_type'], + "sample": table_samples[table_name].get(row['column_name'], None) + }) return schema.values() diff --git a/redash/query_runner/mysql.py b/redash/query_runner/mysql.py index bfd6e7198e..dfde30548a 100644 --- a/redash/query_runner/mysql.py +++ b/redash/query_runner/mysql.py @@ -28,6 +28,7 @@ class Mysql(BaseSQLQueryRunner): noop_query = "SELECT 1" + data_sample_query = "SELECT * FROM {table} LIMIT 1" @classmethod def configuration_schema(cls): @@ -100,7 +101,8 @@ def _get_tables(self, schema): query = """ SELECT col.table_schema as table_schema, col.table_name as table_name, - col.column_name as column_name + col.column_name as column_name, + col.data_type AS column_type FROM `information_schema`.`columns` col WHERE col.table_schema NOT IN ('information_schema', 'performance_schema', 'mysql', 'sys'); """ @@ -111,17 +113,24 @@ def _get_tables(self, schema): raise Exception("Failed getting schema.") results = json_loads(results) + table_samples = {} - for row in results['rows']: + for i, row in enumerate(results['rows']): if row['table_schema'] != self.configuration['db']: table_name = u'{}.{}'.format(row['table_schema'], row['table_name']) else: table_name = row['table_name'] if table_name not in schema: - schema[table_name] = {'name': table_name, 'columns': []} + schema[table_name] = {'name': table_name, 'columns': [], 'metadata': []} + table_samples[table_name] = self._get_table_sample(table_name) schema[table_name]['columns'].append(row['column_name']) + schema[table_name]['metadata'].append({ + "name": row['column_name'], + "type": row['column_type'], + "sample": table_samples[table_name].get(row['column_name'], None) + }) return schema.values() diff --git a/redash/query_runner/pg.py b/redash/query_runner/pg.py index 96aa03c07d..07149cbe08 100644 --- a/redash/query_runner/pg.py +++ b/redash/query_runner/pg.py @@ -46,6 +46,7 @@ def _wait(conn, timeout=None): class PostgreSQL(BaseSQLQueryRunner): noop_query = "SELECT 1" + data_sample_query = "SELECT * FROM {table} LIMIT 1" @classmethod def configuration_schema(cls): @@ -92,17 +93,24 @@ def _get_definitions(self, schema, query): raise Exception("Failed getting schema.") results = json_loads(results) + table_samples = {} - for row in results['rows']: + for i, row in enumerate(results['rows']): if row['table_schema'] != 'public': table_name = u'{}.{}'.format(row['table_schema'], row['table_name']) else: table_name = row['table_name'] if table_name not in schema: - schema[table_name] = {'name': table_name, 'columns': []} + schema[table_name] = {'name': table_name, 'columns': [], 'metadata': []} + table_samples[table_name] = self._get_table_sample(table_name) schema[table_name]['columns'].append(row['column_name']) + schema[table_name]['metadata'].append({ + "name": row['column_name'], + "type": row['column_type'], + "sample": table_samples[table_name].get(row['column_name'], None) + }) def _get_tables(self, schema): ''' @@ -122,7 +130,8 @@ def _get_tables(self, schema): query = """ SELECT s.nspname as table_schema, c.relname as table_name, - a.attname as column_name + a.attname as column_name, + a.atttypid::regtype as column_type FROM pg_class c JOIN pg_namespace s ON c.relnamespace = s.oid @@ -252,6 +261,7 @@ def _get_tables(self, schema): SELECT DISTINCT table_name, table_schema, column_name, + data_type AS column_type, ordinal_position AS pos FROM svv_columns WHERE table_schema NOT IN ('pg_internal','pg_catalog','information_schema') @@ -259,11 +269,12 @@ def _get_tables(self, schema): SELECT DISTINCT view_name::varchar AS table_name, view_schema::varchar AS table_schema, col_name::varchar AS column_name, + data_type::varchar AS column_type, col_num AS pos FROM pg_get_late_binding_view_cols() - cols(view_schema name, view_name name, col_name name, col_type varchar, col_num int) + cols(view_schema name, view_name name, data_type name, col_name name, col_type varchar, col_num int) ) - SELECT table_name, table_schema, column_name + SELECT table_name, table_schema, column_name, column_type FROM tables WHERE HAS_SCHEMA_PRIVILEGE(table_schema, 'USAGE') AND diff --git a/redash/query_runner/presto.py b/redash/query_runner/presto.py index 975ea70c07..baf912b6c9 100644 --- a/redash/query_runner/presto.py +++ b/redash/query_runner/presto.py @@ -31,6 +31,7 @@ class Presto(BaseQueryRunner): noop_query = 'SHOW TABLES' + data_sample_query = "SELECT * FROM {table} LIMIT 1" @classmethod def configuration_schema(cls): @@ -72,25 +73,30 @@ def type(cls): def get_schema(self, get_stats=False): schema = {} query = """ - SELECT table_schema, table_name, column_name + SELECT table_schema, table_name, column_name, data_type AS column_type FROM information_schema.columns WHERE table_schema NOT IN ('pg_catalog', 'information_schema') """ results, error = self.run_query(query, None) - if error is not None: raise Exception("Failed getting schema.") results = json_loads(results) + table_samples = {} - for row in results['rows']: + for i, row in enumerate(results['rows']): table_name = '{}.{}'.format(row['table_schema'], row['table_name']) - if table_name not in schema: - schema[table_name] = {'name': table_name, 'columns': []} + schema[table_name] = {'name': table_name, 'columns': [], 'metadata': []} + table_samples[table_name] = self._get_table_sample(table_name) schema[table_name]['columns'].append(row['column_name']) + schema[table_name]['metadata'].append({ + "name": row['column_name'], + "type": row['column_type'], + "sample": table_samples[table_name].get(row['column_name'], None) + }) return schema.values() From 32901d19e305e9871062f7ddacf9b0ffd0331301 Mon Sep 17 00:00:00 2001 From: Marina Samuel Date: Fri, 23 Nov 2018 17:28:27 -0500 Subject: [PATCH 02/21] Add Table and Column metadata tables. --- migrations/versions/280daa582976_.py | 49 +++++++++++++++++++++++++++ redash/models/__init__.py | 50 ++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 migrations/versions/280daa582976_.py diff --git a/migrations/versions/280daa582976_.py b/migrations/versions/280daa582976_.py new file mode 100644 index 0000000000..5c58804eb0 --- /dev/null +++ b/migrations/versions/280daa582976_.py @@ -0,0 +1,49 @@ +"""Add column metadata and table metadata + +Revision ID: 280daa582976 +Revises: 73beceabb948 +Create Date: 2019-01-24 18:23:53.040608 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '280daa582976' +down_revision = '73beceabb948' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('table_metadata', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('data_source_id', sa.Integer(), nullable=False), + sa.Column('table_exists', sa.Boolean(), nullable=False), + sa.Column('table_name', sa.String(length=255), nullable=False), + sa.Column('table_description', sa.String(length=4096), nullable=True), + sa.Column('column_metadata', sa.Boolean(), nullable=False), + sa.Column('sample_query', sa.Text(), nullable=True), + sa.ForeignKeyConstraint(['data_source_id'], ['data_sources.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('column_metadata', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('table_id', sa.Integer(), nullable=False), + sa.Column('column_name', sa.String(length=255), nullable=False), + sa.Column('column_type', sa.String(length=255), nullable=True), + sa.Column('column_example', sa.String(length=4096), nullable=True), + sa.Column('column_exists', sa.Boolean(), nullable=False), + sa.ForeignKeyConstraint(['table_id'], ['table_metadata.id'], ), + sa.PrimaryKeyConstraint('id') + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('column_metadata') + op.drop_table('table_metadata') + # ### end Alembic commands ### diff --git a/redash/models/__init__.py b/redash/models/__init__.py index 69af5fcf44..4aad5f73aa 100644 --- a/redash/models/__init__.py +++ b/redash/models/__init__.py @@ -63,6 +63,56 @@ def get(self, query_id): scheduled_queries_executions = ScheduledQueriesExecutions() +@python_2_unicode_compatible +class TableMetadata(db.Model): + id = Column(db.Integer, primary_key=True) + data_source_id = Column(db.Integer, db.ForeignKey("data_sources.id")) + table_exists = Column(db.Boolean, default=True) + table_name = Column(db.String(255)) + table_description = Column(db.String(4096), nullable=True) + column_metadata = Column(db.Boolean, default=False) + sample_query = Column("sample_query", db.Text, nullable=True) + + __tablename__ = 'table_metadata' + + def __str__(self): + return text_type(self.table_name) + + def to_dict(self): + return { + 'id': self.id, + 'data_source_id': self.data_source_id, + 'table_exists': self.table_exists, + 'table_name': self.table_name, + 'table_description': self.table_description, + 'column_metadata': self.column_metadata, + 'sample_query': self.sample_query, + } + +@python_2_unicode_compatible +class ColumnMetadata(db.Model): + id = Column(db.Integer, primary_key=True) + table_id = Column(db.Integer, db.ForeignKey("table_metadata.id")) + column_name = Column(db.String(255)) + column_type = Column(db.String(255), nullable=True) + column_example = Column(db.String(4096), nullable=True) + column_exists = Column(db.Boolean, default=True) + + __tablename__ = 'column_metadata' + + def __str__(self): + return text_type(self.name) + + def to_dict(self): + return { + 'id': self.id, + 'table_id': self.table_id, + 'column_name': self.column_name, + 'column_type': self.column_type, + 'column_example': self.column_example, + 'column_exists': self.column_exists, + } + @python_2_unicode_compatible @generic_repr('id', 'name', 'type', 'org_id', 'created_at') class DataSource(BelongsToOrgMixin, db.Model): From b5ee1224685154deb4a2d15516997ba211779332 Mon Sep 17 00:00:00 2001 From: Marina Samuel Date: Fri, 23 Nov 2018 17:30:29 -0500 Subject: [PATCH 03/21] Periodically update table and column schema tables in a celery task. --- redash/tasks/queries.py | 56 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py index 1fc2c50e08..be9f6275fc 100644 --- a/redash/tasks/queries.py +++ b/redash/tasks/queries.py @@ -9,6 +9,7 @@ from six import text_type from redash import models, redis_connection, settings, statsd_client +from redash.models import TableMetadata, ColumnMetadata from redash.query_runner import InterruptException from redash.tasks.alerts import check_alerts_for_query from redash.utils import gen_query_hash, json_dumps, json_loads, utcnow, mustache_render @@ -379,7 +380,60 @@ def refresh_schema(data_source_id): logger.info(u"task=refresh_schema state=start ds_id=%s", ds.id) start_time = time.time() try: - ds.get_schema(refresh=True) + existing_tables = set() + schema = ds.query_runner.get_schema(get_stats=True) + for table in schema: + table_name = table['name'] + existing_tables.add(table_name) + + # Assume that there will only exist 1 table with a given name so we use first() + persisted_table = models.db.session.query(TableMetadata.id).filter(TableMetadata.table_name==table_name).first() + if persisted_table: + models.db.session.query(TableMetadata).filter(TableMetadata.id==persisted_table.id).update({"table_exists": True}) + else: + metadata = 'metadata' in table + persisted_table = TableMetadata(table_name=table_name, data_source_id=ds.id, column_metadata=metadata) + models.db.session.add(persisted_table) + models.db.session.flush() + + existing_columns = set() + for i, column in enumerate(table['columns']): + existing_columns.add(column) + column_metadata = { + 'table_id': persisted_table.id, + 'column_name': column, + 'column_type': None, + 'column_example': None, + 'column_exists': True + } + if 'metadata' in table: + column_metadata['column_type'] = table['metadata'][i]['type'] + + # Note: the query example can be quite large, so we limit its size. + column_example = str(table['metadata'][i]['sample']) + column_metadata['column_example'] = column_example + if column_example and len(column_example) > 4000: + column_metadata['column_example'] = column_example[:4000] + '...' + + # If the column exists, update it, otherwise create a new one. + persisted_column = models.db.session.query(ColumnMetadata.id).filter( + ColumnMetadata.column_name==column, ColumnMetadata.table_id==persisted_table.id).first() + if persisted_column: + models.db.session.query(ColumnMetadata).filter(ColumnMetadata.id==persisted_column.id).update(column_metadata) + else: + models.db.session.add(ColumnMetadata(**column_metadata)) + + # If a column did not exist, set the 'column_exists' flag to false. + models.db.session.query(ColumnMetadata).filter(ColumnMetadata.table_id==persisted_table.id, + ~ColumnMetadata.column_name.in_(tuple(existing_columns))).update({"column_exists": False}, synchronize_session='fetch') + + # If a table did not exist in the get_schema() response above, set the 'table_exists' flag to false. + tables_to_update = models.db.session.query(TableMetadata).filter( + ~TableMetadata.table_name.in_(tuple(existing_tables))) + + tables_to_update.update({"table_exists": False}, synchronize_session='fetch') + models.db.session.commit() + logger.info(u"task=refresh_schema state=finished ds_id=%s runtime=%.2f", ds.id, time.time() - start_time) statsd_client.incr('refresh_schema.success') except SoftTimeLimitExceeded: From 41c922575861fa205417613fcde1b24764aa65c1 Mon Sep 17 00:00:00 2001 From: Marina Samuel Date: Fri, 23 Nov 2018 17:31:33 -0500 Subject: [PATCH 04/21] Fetching schema returns data from table and column metadata tables. --- redash/models/__init__.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/redash/models/__init__.py b/redash/models/__init__.py index 4aad5f73aa..10fba84e1a 100644 --- a/redash/models/__init__.py +++ b/redash/models/__init__.py @@ -194,21 +194,25 @@ def delete(self): return res def get_schema(self, refresh=False): - key = "data_source:schema:{}".format(self.id) - - cache = None - if not refresh: - cache = redis_connection.get(key) - - if cache is None: - query_runner = self.query_runner - schema = sorted(query_runner.get_schema(get_stats=refresh), key=lambda t: t['name']) - - redis_connection.set(key, json_dumps(schema)) - else: - schema = json_loads(cache) - - return schema + schema = [] + tables = db.session.query(TableMetadata).filter(TableMetadata.data_source_id == self.id).all() + for table in tables: + table_info = { + 'name': table.table_name, + 'exists': table.table_exists, + 'hasColumnMetadata': table.column_metadata, + 'columns': []} + columns = db.session.query(ColumnMetadata).filter(ColumnMetadata.table_id==table.id) + table_info['columns'] = sorted([{ + 'key': column.id, + 'name': column.column_name, + 'type': column.column_type, + 'exists': column.column_exists, + 'example': column.column_example + } for column in columns], key=lambda column: column['name']) + schema.append(table_info) + + return sorted(schema, key=lambda table: table['name']) def _pause_key(self): return 'ds:{}:pause'.format(self.id) From 280876b4bb58d1cb769e39fc087648b9c36c253e Mon Sep 17 00:00:00 2001 From: Marina Samuel Date: Sun, 25 Nov 2018 19:20:42 -0500 Subject: [PATCH 05/21] Add tests for backend changes. --- redash/tasks/__init__.py | 2 +- tests/factories.py | 15 +++ tests/models/test_data_sources.py | 54 ++++----- tests/query_runner/test_get_schema_format.py | 78 ++++++++++++ tests/tasks/test_refresh_schemas.py | 121 ++++++++++++++++++- 5 files changed, 235 insertions(+), 35 deletions(-) create mode 100644 tests/query_runner/test_get_schema_format.py diff --git a/redash/tasks/__init__.py b/redash/tasks/__init__.py index 418ed92c91..6a3b010603 100644 --- a/redash/tasks/__init__.py +++ b/redash/tasks/__init__.py @@ -1,3 +1,3 @@ from .general import record_event, version_check, send_mail, sync_user_details -from .queries import QueryTask, refresh_queries, refresh_schemas, cleanup_tasks, cleanup_query_results, execute_query +from .queries import QueryTask, refresh_queries, refresh_schemas, refresh_schema, cleanup_tasks, cleanup_query_results, execute_query from .alerts import check_alerts_for_query diff --git a/tests/factories.py b/tests/factories.py index 2c82e186da..fa8edb5c03 100644 --- a/tests/factories.py +++ b/tests/factories.py @@ -79,6 +79,15 @@ def __call__(self): data_source=data_source_factory.create, org_id=1) +table_metadata_factory = ModelFactory(redash.models.TableMetadata, + data_source_id=1, + table_exists=True, + table_name='table') + +column_metadata_factory = ModelFactory(redash.models.ColumnMetadata, + table_id=1, + column_name='column') + query_with_params_factory = ModelFactory(redash.models.Query, name='New Query with Params', description='', @@ -176,6 +185,12 @@ def create_org(self, **kwargs): return org + def create_table_metadata(self, **kwargs): + return table_metadata_factory.create(**kwargs) + + def create_column_metadata(self, **kwargs): + return column_metadata_factory.create(**kwargs) + def create_user(self, **kwargs): args = { 'org': self.org, diff --git a/tests/models/test_data_sources.py b/tests/models/test_data_sources.py index 037ff77a05..6bc58a497f 100644 --- a/tests/models/test_data_sources.py +++ b/tests/models/test_data_sources.py @@ -1,4 +1,3 @@ -import mock from tests import BaseTestCase from redash.models import DataSource, Query, QueryResult @@ -7,38 +6,27 @@ class DataSourceTest(BaseTestCase): def test_get_schema(self): - return_value = [{'name': 'table', 'columns': []}] - - with mock.patch('redash.query_runner.pg.PostgreSQL.get_schema') as patched_get_schema: - patched_get_schema.return_value = return_value - - schema = self.factory.data_source.get_schema() - - self.assertEqual(return_value, schema) - - def test_get_schema_uses_cache(self): - return_value = [{'name': 'table', 'columns': []}] - with mock.patch('redash.query_runner.pg.PostgreSQL.get_schema') as patched_get_schema: - patched_get_schema.return_value = return_value - - self.factory.data_source.get_schema() - schema = self.factory.data_source.get_schema() - - self.assertEqual(return_value, schema) - self.assertEqual(patched_get_schema.call_count, 1) - - def test_get_schema_skips_cache_with_refresh_true(self): - return_value = [{'name': 'table', 'columns': []}] - with mock.patch('redash.query_runner.pg.PostgreSQL.get_schema') as patched_get_schema: - patched_get_schema.return_value = return_value - - self.factory.data_source.get_schema() - new_return_value = [{'name': 'new_table', 'columns': []}] - patched_get_schema.return_value = new_return_value - schema = self.factory.data_source.get_schema(refresh=True) - - self.assertEqual(new_return_value, schema) - self.assertEqual(patched_get_schema.call_count, 2) + data_source = self.factory.create_data_source() + table_metadata = self.factory.create_table_metadata(data_source_id=data_source.id) + column_metadata = self.factory.create_column_metadata( + table_id=table_metadata.id, + column_type='boolean', + column_example=True) + + return_value = [{ + 'name': 'table', + 'hasColumnMetadata': False, + 'exists': True, + 'columns': [{ + 'key': 1, + 'name': 'column', + 'type': 'boolean', + 'exists': True, + 'example': True + }] + }] + schema = data_source.get_schema() + self.assertEqual(return_value, schema) class TestDataSourceCreate(BaseTestCase): diff --git a/tests/query_runner/test_get_schema_format.py b/tests/query_runner/test_get_schema_format.py new file mode 100644 index 0000000000..0da8bc4202 --- /dev/null +++ b/tests/query_runner/test_get_schema_format.py @@ -0,0 +1,78 @@ +import json +import mock + +from unittest import TestCase + +from redash.query_runner.presto import Presto +from redash.query_runner.athena import Athena +from redash.query_runner.mysql import Mysql +from redash.query_runner.pg import PostgreSQL, Redshift + +class TestBaseQueryRunner(TestCase): + def setUp(self): + self.query_runners = [{ + 'instance': Presto({}), + 'mock_location': 'presto.Presto' + }, { + 'instance': Athena({}), + 'mock_location': 'athena.Athena' + }, { + 'instance': Mysql({'db': None}), + 'mock_location': 'mysql.Mysql' + }, { + 'instance': PostgreSQL({}), + 'mock_location': 'pg.PostgreSQL' + }, { + 'instance': Redshift({}), + 'mock_location': 'pg.Redshift' + }] + + def _setup_mock(self, function_to_patch): + patcher = mock.patch(function_to_patch) + patched_function = patcher.start() + self.addCleanup(patcher.stop) + return patched_function + + def assert_correct_schema_format(self, query_runner, mock_location): + EXPECTED_SCHEMA_RESULT = [{ + 'columns': ['created_date'], + 'metadata': [{ + 'name': 'created_date', + 'type': 'varchar', + 'sample': '2017-10-26' + }], + 'name': 'default.table_name' + }] + + get_schema_query_response = { + "rows": [{ + "table_schema": "default", + "table_name": "table_name", + "column_type": "varchar", + "column_name": "created_date" + }] + } + get_samples_query_response = { + "rows": [{ + "created_date": "2017-10-26" + }] + } + + self.run_count = 0 + def query_runner_resonses(query, user): + response = (json.dumps(get_schema_query_response), None) + if self.run_count > 0: + response = (json.dumps(get_samples_query_response), None) + self.run_count += 1 + return response + + self.patched_run_query = self._setup_mock( + 'redash.query_runner.{location}.run_query'.format(location=mock_location)) + self.patched_run_query.side_effect = query_runner_resonses + + schema = query_runner.get_schema() + self.assertEqual(schema, EXPECTED_SCHEMA_RESULT) + + def test_get_schema_format(self): + for runner in self.query_runners: + self.assert_correct_schema_format(runner['instance'], runner['mock_location']) diff --git a/tests/tasks/test_refresh_schemas.py b/tests/tasks/test_refresh_schemas.py index 809ecd5e5f..33446d83e5 100644 --- a/tests/tasks/test_refresh_schemas.py +++ b/tests/tasks/test_refresh_schemas.py @@ -1,12 +1,45 @@ +import mock +import copy import datetime from mock import ANY, call, patch from tests import BaseTestCase -from redash.tasks import refresh_schemas +from redash import models +from redash.tasks import refresh_schemas, refresh_schema +from redash.models import TableMetadata, ColumnMetadata class TestRefreshSchemas(BaseTestCase): + def setUp(self): + super(TestRefreshSchemas, self).setUp() + + self.COLUMN_NAME = 'first_column' + self.COLUMN_TYPE = 'text' + self.COLUMN_EXAMPLE = 'some text for column value' + self.EXPECTED_COLUMN_METADATA = { + 'id': 1, + 'table_id': 1, + 'column_name': self.COLUMN_NAME, + 'column_type': self.COLUMN_TYPE, + 'column_example': self.COLUMN_EXAMPLE, + 'column_exists': True, + } + + get_schema_patcher = mock.patch('redash.query_runner.pg.PostgreSQL.get_schema') + self.patched_get_schema = get_schema_patcher.start() + self.addCleanup(get_schema_patcher.stop) + self.default_schema_return_value = [{ + 'name': 'table', + 'columns': [self.COLUMN_NAME], + 'metadata': [{ + 'name': self.COLUMN_NAME, + 'type': self.COLUMN_TYPE, + 'sample': self.COLUMN_EXAMPLE + }] + }] + self.patched_get_schema.return_value = self.default_schema_return_value + def test_calls_refresh_of_all_data_sources(self): self.factory.data_source # trigger creation with patch('redash.tasks.queries.refresh_schema.apply_async') as refresh_job: @@ -25,3 +58,89 @@ def test_skips_paused_data_sources(self): with patch('redash.tasks.queries.refresh_schema.apply_async') as refresh_job: refresh_schemas() refresh_job.assert_called() + + def test_refresh_schema_creates_tables(self): + EXPECTED_TABLE_METADATA = { + 'id': 1, + 'table_exists': True, + 'table_name': 'table', + 'sample_query': None, + 'table_description': None, + 'column_metadata': True, + 'data_source_id': 1 + } + + refresh_schema(self.factory.data_source.id) + table_metadata = models.db.session.query(TableMetadata).all() + column_metadata = models.db.session.query(ColumnMetadata).all() + + self.assertEqual(len(table_metadata), 1) + self.assertEqual(len(column_metadata), 1) + self.assertEqual(table_metadata[0].to_dict(), EXPECTED_TABLE_METADATA) + self.assertEqual(column_metadata[0].to_dict(), self.EXPECTED_COLUMN_METADATA) + + def test_refresh_schema_deleted_table_marked(self): + refresh_schema(self.factory.data_source.id) + table_metadata = models.db.session.query(TableMetadata).all() + column_metadata = models.db.session.query(ColumnMetadata).all() + + self.assertEqual(len(table_metadata), 1) + self.assertEqual(len(column_metadata), 1) + self.assertEqual(table_metadata[0].to_dict()['table_exists'], True) + + # Table is gone, `table_exists` should be False. + self.patched_get_schema.return_value = [] + + refresh_schema(self.factory.data_source.id) + table_metadata = models.db.session.query(TableMetadata).all() + column_metadata = models.db.session.query(ColumnMetadata).all() + + self.assertEqual(len(table_metadata), 1) + self.assertEqual(len(column_metadata), 1) + self.assertEqual(table_metadata[0].to_dict()['table_exists'], False) + + # Table is back, `table_exists` should be True again. + self.patched_get_schema.return_value = self.default_schema_return_value + refresh_schema(self.factory.data_source.id) + table_metadata = models.db.session.query(TableMetadata).all() + self.assertEqual(table_metadata[0].to_dict()['table_exists'], True) + + def test_refresh_schema_delete_column(self): + NEW_COLUMN_NAME = 'new_column' + refresh_schema(self.factory.data_source.id) + column_metadata = models.db.session.query(ColumnMetadata).all() + + self.assertEqual(column_metadata[0].to_dict()['column_exists'], True) + + self.patched_get_schema.return_value = [{ + 'name': 'table', + 'columns': [NEW_COLUMN_NAME], + 'metadata': [{ + 'name': NEW_COLUMN_NAME, + 'type': self.COLUMN_TYPE, + 'sample': self.COLUMN_EXAMPLE + }] + }] + + refresh_schema(self.factory.data_source.id) + column_metadata = models.db.session.query(ColumnMetadata).all() + self.assertEqual(len(column_metadata), 2) + + self.assertEqual(column_metadata[1].to_dict()['column_exists'], False) + self.assertEqual(column_metadata[0].to_dict()['column_exists'], True) + + def test_refresh_schema_update_column(self): + UPDATED_COLUMN_TYPE = 'varchar' + + refresh_schema(self.factory.data_source.id) + column_metadata = models.db.session.query(ColumnMetadata).all() + self.assertEqual(column_metadata[0].to_dict(), self.EXPECTED_COLUMN_METADATA) + + updated_schema = copy.deepcopy(self.default_schema_return_value) + updated_schema[0]['metadata'][0]['type'] = UPDATED_COLUMN_TYPE + self.patched_get_schema.return_value = updated_schema + + refresh_schema(self.factory.data_source.id) + column_metadata = models.db.session.query(ColumnMetadata).all() + self.assertNotEqual(column_metadata[0].to_dict(), self.EXPECTED_COLUMN_METADATA) + self.assertEqual(column_metadata[0].to_dict()['column_type'], UPDATED_COLUMN_TYPE) From a541d81c4c18840bd1613a61359072b318eb8e86 Mon Sep 17 00:00:00 2001 From: Marina Samuel Date: Sun, 25 Nov 2018 19:22:04 -0500 Subject: [PATCH 06/21] Front-end shows extra table metadata and uses new schema response. --- client/app/assets/less/ant.less | 1 + .../app/assets/less/inc/schema-browser.less | 14 ++-- client/app/components/proptypes.js | 7 ++ client/app/components/queries/SchemaData.jsx | 65 +++++++++++++++++++ .../components/queries/schema-browser.html | 14 +++- .../app/components/queries/schema-browser.js | 13 ++++ 6 files changed, 106 insertions(+), 8 deletions(-) create mode 100644 client/app/components/queries/SchemaData.jsx diff --git a/client/app/assets/less/ant.less b/client/app/assets/less/ant.less index d2fd4a9581..0037893255 100644 --- a/client/app/assets/less/ant.less +++ b/client/app/assets/less/ant.less @@ -13,6 +13,7 @@ @import '~antd/lib/radio/style/index'; @import '~antd/lib/time-picker/style/index'; @import '~antd/lib/pagination/style/index'; +@import '~antd/lib/drawer/style/index'; @import '~antd/lib/table/style/index'; @import '~antd/lib/popover/style/index'; @import '~antd/lib/icon/style/index'; diff --git a/client/app/assets/less/inc/schema-browser.less b/client/app/assets/less/inc/schema-browser.less index 0034391086..d547a78790 100644 --- a/client/app/assets/less/inc/schema-browser.less +++ b/client/app/assets/less/inc/schema-browser.less @@ -7,14 +7,14 @@ div.table-name { border-radius: @redash-radius; position: relative; - .copy-to-editor { + .copy-to-editor, .info { display: none; } &:hover { background: fade(@redash-gray, 10%); - .copy-to-editor { + .copy-to-editor, .info { display: flex; } } @@ -36,7 +36,7 @@ div.table-name { background: transparent; } - .copy-to-editor { + .copy-to-editor, .info { color: fade(@redash-gray, 90%); cursor: pointer; position: absolute; @@ -49,6 +49,10 @@ div.table-name { justify-content: center; } + .info { + right: 20px + } + .table-open { padding: 0 22px 0 26px; overflow: hidden; @@ -56,14 +60,14 @@ div.table-name { white-space: nowrap; position: relative; - .copy-to-editor { + .copy-to-editor, .info { display: none; } &:hover { background: fade(@redash-gray, 10%); - .copy-to-editor { + .copy-to-editor, .info { display: flex; } } diff --git a/client/app/components/proptypes.js b/client/app/components/proptypes.js index f35e89cc0e..92fc1b217b 100644 --- a/client/app/components/proptypes.js +++ b/client/app/components/proptypes.js @@ -11,6 +11,13 @@ export const DataSource = PropTypes.shape({ type_name: PropTypes.string, }); +export const DataSourceMetadata = PropTypes.shape({ + key: PropTypes.number, + name: PropTypes.string, + type: PropTypes.string, + example: PropTypes.string, +}); + export const Table = PropTypes.shape({ columns: PropTypes.arrayOf(PropTypes.string).isRequired, }); diff --git a/client/app/components/queries/SchemaData.jsx b/client/app/components/queries/SchemaData.jsx new file mode 100644 index 0000000000..b17600676e --- /dev/null +++ b/client/app/components/queries/SchemaData.jsx @@ -0,0 +1,65 @@ +import React from 'react'; +import PropTypes from 'prop-types'; +import { react2angular } from 'react2angular'; +import Drawer from 'antd/lib/drawer'; +import Table from 'antd/lib/table'; + +import { DataSourceMetadata } from '@/components/proptypes'; + +class SchemaData extends React.PureComponent { + static propTypes = { + show: PropTypes.bool.isRequired, + onClose: PropTypes.func.isRequired, + tableName: PropTypes.string, + tableMetadata: PropTypes.arrayOf(DataSourceMetadata), + }; + + static defaultProps = { + tableName: '', + tableMetadata: [], + }; + + render() { + const columns = [{ + title: 'Column Name', + dataIndex: 'name', + width: 400, + key: 'name', + }, { + title: 'Column Type', + dataIndex: 'type', + width: 400, + key: 'type', + }, { + title: 'Example', + dataIndex: 'example', + width: 400, + key: 'example', + }]; + + return ( + + + + ); + } +} + +export default function init(ngModule) { + ngModule.component('schemaData', react2angular(SchemaData, null, [])); +} + +init.init = true; diff --git a/client/app/components/queries/schema-browser.html b/client/app/components/queries/schema-browser.html index 6e3f518059..a6ba68b792 100644 --- a/client/app/components/queries/schema-browser.html +++ b/client/app/components/queries/schema-browser.html @@ -9,22 +9,30 @@
-
+
{{table.name}} ({{table.size}}) +
-
{{column}} +
{{column.name}} + ng-click="$ctrl.itemSelected($event, [column.name])">
+
diff --git a/client/app/components/queries/schema-browser.js b/client/app/components/queries/schema-browser.js index 34615aa590..a761bc9202 100644 --- a/client/app/components/queries/schema-browser.js +++ b/client/app/components/queries/schema-browser.js @@ -8,6 +8,17 @@ function SchemaBrowserCtrl($rootScope, $scope) { $scope.$broadcast('vsRepeatTrigger'); }; + $scope.showSchemaInfo = false; + $scope.openSchemaInfo = ($event, tableName, tableMetadata) => { + $scope.tableName = tableName; + $scope.tableMetadata = tableMetadata; + $scope.showSchemaInfo = true; + $event.stopPropagation(); + }; + $scope.closeSchemaInfo = () => { + $scope.$apply(() => { $scope.showSchemaInfo = false; }); + }; + this.getSize = (table) => { let size = 22; @@ -22,6 +33,8 @@ function SchemaBrowserCtrl($rootScope, $scope) { return this.schema === undefined || this.schema.length === 0; }; + this.itemExists = item => item.exists; + this.itemSelected = ($event, hierarchy) => { $rootScope.$broadcast('query-editor.command', 'paste', hierarchy.join('.')); $event.preventDefault(); From 01fc553fa387edce2a090e652eafcb89a1a2b92b Mon Sep 17 00:00:00 2001 From: Marina Samuel Date: Thu, 17 Jan 2019 15:23:40 -0500 Subject: [PATCH 07/21] Delete datasource schema data when deleting a data source. --- redash/models/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/redash/models/__init__.py b/redash/models/__init__.py index 10fba84e1a..fce517001a 100644 --- a/redash/models/__init__.py +++ b/redash/models/__init__.py @@ -187,6 +187,11 @@ def get_by_id(cls, _id): return cls.query.filter(cls.id == _id).one() def delete(self): + # Delete the relevant metadata about a data source first. + tables = TableMetadata.query.filter(TableMetadata.data_source_id == self.id).options(load_only('id')) + ColumnMetadata.query.filter(ColumnMetadata.table_id.in_(tables.subquery())).delete(synchronize_session=False) + tables.delete() + Query.query.filter(Query.data_source == self).update(dict(data_source_id=None, latest_query_data_id=None)) QueryResult.query.filter(QueryResult.data_source == self).delete() res = db.session.delete(self) From 3a098d1e520511b13111d85fa0d89eb3754c6842 Mon Sep 17 00:00:00 2001 From: Marina Samuel Date: Thu, 17 Jan 2019 15:24:26 -0500 Subject: [PATCH 08/21] Process and store data source schema when a data source is first created or after a migration. --- docker-compose.yml | 2 +- redash/handlers/data_sources.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index e01be5bfd1..dbe30d0929 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,7 +29,7 @@ services: REDASH_LOG_LEVEL: "INFO" REDASH_REDIS_URL: "redis://redis:6379/0" REDASH_DATABASE_URL: "postgresql://postgres@postgres/postgres" - QUEUES: "queries,scheduled_queries,celery" + QUEUES: "queries,scheduled_queries,celery,schemas" WORKERS_COUNT: 2 redis: image: redis:3-alpine diff --git a/redash/handlers/data_sources.py b/redash/handlers/data_sources.py index 65532ee509..b001e80b66 100644 --- a/redash/handlers/data_sources.py +++ b/redash/handlers/data_sources.py @@ -10,6 +10,7 @@ from redash.handlers.base import BaseResource, get_object_or_404 from redash.permissions import (require_access, require_admin, require_permission, view_only) +from redash.tasks.queries import refresh_schemas from redash.query_runner import (get_configuration_schema_for_query_runner_type, query_runners, NotSupported) from redash.utils import filter_none @@ -129,6 +130,9 @@ def post(self): options=config) models.db.session.commit() + + # Refresh the stored schemas when a new data source is added to the list + refresh_schemas.apply_async(queue="schemas") except IntegrityError as e: if req['name'] in e.message: abort(400, message="Data source with the name {} already exists.".format(req['name'])) @@ -154,6 +158,13 @@ def get(self, data_source_id): try: response['schema'] = data_source.get_schema(refresh) + + # If the TableMetadata table has no information about this data source, + # this might be due to a fresh migration to these new tables. + # They will likely only populate at the next refresh (30 min intervals) + # So let's refresh them now to get them sooner. + if len(response['schema']) == 0: + refresh_schemas.apply_async(queue="schemas") except NotSupported: response['error'] = { 'code': 1, From 7114b81ee70eb3e2cc190a9979380b31b68b8511 Mon Sep 17 00:00:00 2001 From: Marina Samuel Date: Mon, 21 Jan 2019 16:10:11 -0500 Subject: [PATCH 09/21] Tables should have a unique name per datasource. --- redash/tasks/queries.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py index be9f6275fc..8044ee483a 100644 --- a/redash/tasks/queries.py +++ b/redash/tasks/queries.py @@ -386,8 +386,12 @@ def refresh_schema(data_source_id): table_name = table['name'] existing_tables.add(table_name) - # Assume that there will only exist 1 table with a given name so we use first() - persisted_table = models.db.session.query(TableMetadata.id).filter(TableMetadata.table_name==table_name).first() + # Assume that there will only exist 1 table with a given name for a given data source so we use first() + persisted_table = models.db.session.query( + TableMetadata).filter( + TableMetadata.table_name==table_name).filter( + TableMetadata.data_source_id==ds.id).first() + if persisted_table: models.db.session.query(TableMetadata).filter(TableMetadata.id==persisted_table.id).update({"table_exists": True}) else: From f072e64a3dd264653067e213e5212f58bc4d90f2 Mon Sep 17 00:00:00 2001 From: Marina Samuel Date: Mon, 21 Jan 2019 21:00:49 -0500 Subject: [PATCH 10/21] Add schemas queue to docker files. --- .circleci/docker-compose.cypress.yml | 2 +- bin/docker-entrypoint | 2 +- docker-compose.production.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/docker-compose.cypress.yml b/.circleci/docker-compose.cypress.yml index 5305f41d4e..2483582ce7 100644 --- a/.circleci/docker-compose.cypress.yml +++ b/.circleci/docker-compose.cypress.yml @@ -23,7 +23,7 @@ services: REDASH_LOG_LEVEL: "INFO" REDASH_REDIS_URL: "redis://redis:6379/0" REDASH_DATABASE_URL: "postgresql://postgres@postgres/postgres" - QUEUES: "queries,scheduled_queries,celery" + QUEUES: "queries,scheduled_queries,celery,schemas" WORKERS_COUNT: 2 cypress: build: diff --git a/bin/docker-entrypoint b/bin/docker-entrypoint index 8cc09a4949..2ecd723d66 100755 --- a/bin/docker-entrypoint +++ b/bin/docker-entrypoint @@ -3,7 +3,7 @@ set -e worker() { WORKERS_COUNT=${WORKERS_COUNT:-2} - QUEUES=${QUEUES:-queries,scheduled_queries,celery} + QUEUES=${QUEUES:-queries,scheduled_queries,celery,schemas} echo "Starting $WORKERS_COUNT workers for queues: $QUEUES..." exec /usr/local/bin/celery worker --app=redash.worker -c$WORKERS_COUNT -Q$QUEUES -linfo --maxtasksperchild=10 -Ofair diff --git a/docker-compose.production.yml b/docker-compose.production.yml index f0b9812d7c..f7557ae94f 100644 --- a/docker-compose.production.yml +++ b/docker-compose.production.yml @@ -30,7 +30,7 @@ services: REDASH_LOG_LEVEL: "INFO" REDASH_REDIS_URL: "redis://redis:6379/0" REDASH_DATABASE_URL: "postgresql://postgres@postgres/postgres" - QUEUES: "queries,scheduled_queries,celery" + QUEUES: "queries,scheduled_queries,celery,schemas" WORKERS_COUNT: 2 restart: always redis: From f501d3f19dd01c41f6ca4e34d40aca04222508cf Mon Sep 17 00:00:00 2001 From: Marina Samuel Date: Thu, 1 Nov 2018 12:11:32 -0400 Subject: [PATCH 11/21] Closes #3192: Add data source config options. --- .../assets/less/redash/redash-newstyle.less | 4 + client/app/components/proptypes.js | 1 + client/app/components/queries/SchemaData.jsx | 10 + .../components/queries/schema-browser.html | 3 +- .../app/components/queries/schema-browser.js | 14 +- .../app/pages/data-sources/EditableTable.jsx | 90 +++++++ client/app/pages/data-sources/SchemaTable.jsx | 246 ++++++++++++++++++ .../app/pages/data-sources/schema-table.css | 5 + client/app/pages/data-sources/show.html | 6 + client/app/pages/data-sources/show.js | 8 +- client/app/services/data-source.js | 6 + redash/handlers/data_sources.py | 6 + redash/models/__init__.py | 21 +- tests/models/test_data_sources.py | 4 + tests/tasks/test_refresh_schemas.py | 4 + 15 files changed, 421 insertions(+), 7 deletions(-) create mode 100644 client/app/pages/data-sources/EditableTable.jsx create mode 100644 client/app/pages/data-sources/SchemaTable.jsx create mode 100644 client/app/pages/data-sources/schema-table.css diff --git a/client/app/assets/less/redash/redash-newstyle.less b/client/app/assets/less/redash/redash-newstyle.less index b2e6ebf018..b6f14e392a 100644 --- a/client/app/assets/less/redash/redash-newstyle.less +++ b/client/app/assets/less/redash/redash-newstyle.less @@ -101,6 +101,10 @@ body { } } +.admin-schema-editor { + padding: 50px 0; +} + .creation-container { h5 { color: #a7a7a7; diff --git a/client/app/components/proptypes.js b/client/app/components/proptypes.js index 92fc1b217b..04bef27a0d 100644 --- a/client/app/components/proptypes.js +++ b/client/app/components/proptypes.js @@ -16,6 +16,7 @@ export const DataSourceMetadata = PropTypes.shape({ name: PropTypes.string, type: PropTypes.string, example: PropTypes.string, + column_description: PropTypes.string, }); export const Table = PropTypes.shape({ diff --git a/client/app/components/queries/SchemaData.jsx b/client/app/components/queries/SchemaData.jsx index b17600676e..f00d4e82ae 100644 --- a/client/app/components/queries/SchemaData.jsx +++ b/client/app/components/queries/SchemaData.jsx @@ -11,11 +11,13 @@ class SchemaData extends React.PureComponent { show: PropTypes.bool.isRequired, onClose: PropTypes.func.isRequired, tableName: PropTypes.string, + tableDescription: PropTypes.string, tableMetadata: PropTypes.arrayOf(DataSourceMetadata), }; static defaultProps = { tableName: '', + tableDescription: '', tableMetadata: [], }; @@ -35,6 +37,11 @@ class SchemaData extends React.PureComponent { dataIndex: 'example', width: 400, key: 'example', + }, { + title: 'Description', + dataIndex: 'column_description', + width: 400, + key: 'column_description', }]; return ( @@ -46,6 +53,9 @@ class SchemaData extends React.PureComponent { onClose={this.props.onClose} visible={this.props.show} > +
+ {this.props.tableDescription} +
({{table.size}}) + ng-click="openSchemaInfo($event, table)"> @@ -32,6 +32,7 @@ diff --git a/client/app/components/queries/schema-browser.js b/client/app/components/queries/schema-browser.js index a761bc9202..13057f3dae 100644 --- a/client/app/components/queries/schema-browser.js +++ b/client/app/components/queries/schema-browser.js @@ -9,9 +9,10 @@ function SchemaBrowserCtrl($rootScope, $scope) { }; $scope.showSchemaInfo = false; - $scope.openSchemaInfo = ($event, tableName, tableMetadata) => { - $scope.tableName = tableName; - $scope.tableMetadata = tableMetadata; + $scope.openSchemaInfo = ($event, table) => { + $scope.tableName = table.name; + $scope.tableDescription = table.table_description; + $scope.tableMetadata = table.columns; $scope.showSchemaInfo = true; $event.stopPropagation(); }; @@ -33,7 +34,12 @@ function SchemaBrowserCtrl($rootScope, $scope) { return this.schema === undefined || this.schema.length === 0; }; - this.itemExists = item => item.exists; + this.itemExists = (item) => { + if ('visible' in item) { + return item.exists && item.visible; + } + return item.exists; + }; this.itemSelected = ($event, hierarchy) => { $rootScope.$broadcast('query-editor.command', 'paste', hierarchy.join('.')); diff --git a/client/app/pages/data-sources/EditableTable.jsx b/client/app/pages/data-sources/EditableTable.jsx new file mode 100644 index 0000000000..6e1f07fce6 --- /dev/null +++ b/client/app/pages/data-sources/EditableTable.jsx @@ -0,0 +1,90 @@ +import React from 'react'; +import Form from 'antd/lib/form'; +import Checkbox from 'antd/lib/checkbox'; +import Input from 'antd/lib/input'; + +const FormItem = Form.Item; +export const EditableContext = React.createContext(); + +const EditableRow = ({ form, index, ...props }) => ( + + + +); + +export const EditableFormRow = Form.create()(EditableRow); + +export class TableVisibilityCheckbox extends React.Component { + render() { + const { + visible, + onChange, + disabled + } = this.props; + + return ( + + {this.props.visible ? 'Visible' : 'Hidden'} + + ) + } +} + +export class EditableCell extends React.Component { + constructor(props) { + super(props); + this.state = { + visible: this.props.record ? this.props.record.table_visible : false + }; + } + + onChange = () => { + this.setState({ visible: !this.state.visible }); + } + + getInput = () => { + if (this.props.inputType === 'checkbox') { + return ( + ); + } + return ; + }; + + render() { + const { + editing, + dataIndex, + title, + inputType, + record, + index, + ...restProps + } = this.props; + + return ( + + {(form) => { + const { getFieldDecorator } = form; + return ( + + ); + }} + + ); + } +} diff --git a/client/app/pages/data-sources/SchemaTable.jsx b/client/app/pages/data-sources/SchemaTable.jsx new file mode 100644 index 0000000000..fbb42d886e --- /dev/null +++ b/client/app/pages/data-sources/SchemaTable.jsx @@ -0,0 +1,246 @@ +import React from 'react'; +import { react2angular } from 'react2angular'; +import PropTypes from 'prop-types'; +import Table from 'antd/lib/table'; +import Popconfirm from 'antd/lib/popconfirm'; +import { Schema } from '@/components/proptypes'; +import { EditableCell, EditableFormRow, EditableContext, TableVisibilityCheckbox } from './EditableTable'; + +import './schema-table.css'; + +function fetchTableData(schema) { + return schema.map(tableData => ({ + key: tableData.id, + name: tableData.name, + table_description: tableData.table_description || '', + table_visible: tableData.visible, + columns: tableData.columns, + })); +} + +const components = { + body: { + row: EditableFormRow, + cell: EditableCell, + }, +}; + +class SchemaTable extends React.Component { + static propTypes = { + schema: Schema, + updateSchema: PropTypes.func.isRequired + }; + + static defaultProps = { + schema: null, + }; + + static getDerivedStateFromProps(nextProps, prevState) { + if (nextProps.schema && prevState.data.length === 0) { + return { + data: fetchTableData(nextProps.schema), + editingKey: prevState.editingKey + }; + } + return prevState; + } + + constructor(props) { + super(props); + this.state = { data: [], editingKey: '' }; + this.columns = [{ + title: 'Table Name', + dataIndex: 'name', + width: '20%', + key: 'name', + }, { + title: 'Table Description', + dataIndex: 'table_description', + width: '55%', + key: 'table_description', + editable: true, + }, { + title: 'Visibility', + dataIndex: 'table_visible', + width: '15%', + key: 'table_visible', + editable: true, + render: (text, record) => { + return ( +
+ + +
+ ); + } + }, { + title: '', + width: '10%', + dataIndex: 'edit', + render: (text, record) => { + // Purposely calling fieldEditor() instead of setting render() to it + // because render() will pass a different third argument than what + // fieldEditory() takes + return this.fieldEditor(text, record); + } + }]; + } + + fieldEditor(text, record, tableData) { + const editable = this.isEditing(record); + const tableKey = tableData ? tableData.key : record.key; + const columnKey = tableData ? record.key : undefined; + return ( +
+ {editable ? ( + + + {form => ( + this.save(form, tableKey, columnKey)} + style={{ marginRight: 8 }} + > + Save + + )} + + this.cancel(record.key)} + > + Cancel + + + ) : ( + this.edit(record.key)}>Edit + )} +
+ ); + } + + expandedRowRender(tableData) { + const columns = [ + { title: 'Column Name', dataIndex: 'name', key: 'name', width: '15%' }, + { title: 'Column Type', dataIndex: 'type', key: 'type', width: '15%' }, + { title: 'Column Example', dataIndex: 'example', key: 'example', width: '20%' }, + { + title: 'Column Description', + dataIndex: 'column_description', + key: 'column_description', + width: '40%', + editable: true, + onCell: record => ({ + record, + inputType: 'text', + dataIndex: 'column_description', + title: 'Column Description', + editing: this.isEditing(record), + }), + }, + { + title: '', + width: '10%', + dataIndex: 'edit', + render: (text, record) => { + return this.fieldEditor(text, record, tableData); + } + } + ]; + + return ( +
+ {editing ? ( + + {getFieldDecorator(dataIndex, { + initialValue: record[dataIndex], + })(this.getInput()) } + + ) : restProps.children} +
+ ); + } + + cancel() { + this.setState({ editingKey: '' }); + } + + edit(key) { + this.setState({ editingKey: key }); + } + + isEditing(record) { + return record.key === this.state.editingKey; + } + + save(form, tableKey, columnKey) { + form.validateFields((error, editedFields) => { + if (error) { + return; + } + const newData = [...this.state.data]; + let spliceIndex = newData.findIndex(item => tableKey === item.key); + + if (spliceIndex < 0) { + return; + } + + const tableRow = newData[spliceIndex]; + let dataToUpdate = newData; + let rowToUpdate = tableRow; + + const columnIndex = tableRow.columns.findIndex(item => columnKey === item.key); + const columnRow = tableRow.columns[columnIndex]; + if (columnKey) { + dataToUpdate = tableRow.columns; + spliceIndex = columnIndex; + rowToUpdate = columnRow; + } + + dataToUpdate.splice(spliceIndex, 1, { + ...rowToUpdate, + ...editedFields, + }); + this.props.updateSchema(editedFields, tableRow.key, columnRow ? columnRow.key : undefined); + this.setState({ data: newData, editingKey: '' }); + }); + } + + render() { + const columns = this.columns.map((col) => { + if (!col.editable) { + return col; + } + return { + ...col, + onCell: record => ({ + record, + inputType: col.dataIndex === 'table_visible' ? 'checkbox' : 'text', + dataIndex: col.dataIndex, + title: col.title, + editing: this.isEditing(record), + }), + }; + }); + + return ( +
+ ); + } +} + +export default function init(ngModule) { + ngModule.component('schemaTable', react2angular(SchemaTable, null, [])); +} + +init.init = true; diff --git a/client/app/pages/data-sources/schema-table.css b/client/app/pages/data-sources/schema-table.css new file mode 100644 index 0000000000..c99846a162 --- /dev/null +++ b/client/app/pages/data-sources/schema-table.css @@ -0,0 +1,5 @@ +.editable-row .ant-form-explain { + position: absolute; + font-size: 12px; + margin-top: -4px; +} \ No newline at end of file diff --git a/client/app/pages/data-sources/show.html b/client/app/pages/data-sources/show.html index c513d1f196..4c64fb09d3 100644 --- a/client/app/pages/data-sources/show.html +++ b/client/app/pages/data-sources/show.html @@ -32,6 +32,12 @@

{{type.name}}

+
+ +
diff --git a/client/app/pages/data-sources/show.js b/client/app/pages/data-sources/show.js index f6754f8bdd..61eff1c847 100644 --- a/client/app/pages/data-sources/show.js +++ b/client/app/pages/data-sources/show.js @@ -1,4 +1,4 @@ -import { find } from 'lodash'; +import { find, bind } from 'lodash'; import debug from 'debug'; import template from './show.html'; @@ -22,6 +22,12 @@ function DataSourceCtrl( $scope.types = $route.current.locals.types; $scope.type = find($scope.types, { type: $scope.dataSource.type }); $scope.canChangeType = $scope.dataSource.id === undefined; + $scope.dataSource.getSchema().then((data) => { + if (data.schema) { + $scope.schema = data.schema; + } + }); + $scope.updateSchema = bind($scope.dataSource.updateSchema, $scope.dataSource); $scope.helpLinks = { athena: 'https://redash.io/help/data-sources/amazon-athena-setup', diff --git a/client/app/services/data-source.js b/client/app/services/data-source.js index f1d69d5cbc..1dfc13df64 100644 --- a/client/app/services/data-source.js +++ b/client/app/services/data-source.js @@ -16,6 +16,7 @@ function DataSourceService($q, $resource, $http) { const actions = { get: { method: 'GET', cache: false, isArray: false }, + post: { method: 'POST', cache: false, isArray: false }, query: { method: 'GET', cache: false, isArray: true }, test: { method: 'POST', @@ -27,6 +28,11 @@ function DataSourceService($q, $resource, $http) { const DataSourceResource = $resource('api/data_sources/:id', { id: '@id' }, actions); + DataSourceResource.prototype.updateSchema = function updateSchema(schema, tableId, columnId) { + const data = { tableId, columnId, schema }; + return $http.post(`api/data_sources/${this.id}/schema`, data); + }; + DataSourceResource.prototype.getSchema = function getSchema(refresh = false) { if (this._schema === undefined || refresh) { return fetchSchema(this.id, refresh).then((response) => { diff --git a/redash/handlers/data_sources.py b/redash/handlers/data_sources.py index b001e80b66..2532ece19b 100644 --- a/redash/handlers/data_sources.py +++ b/redash/handlers/data_sources.py @@ -149,6 +149,12 @@ def post(self): class DataSourceSchemaResource(BaseResource): + @require_admin + def post(self, data_source_id): + data_source = get_object_or_404(models.DataSource.get_by_id_and_org, data_source_id, self.current_org) + new_schema_data = request.get_json(force=True) + data_source.save_schema(new_schema_data) + def get(self, data_source_id): data_source = get_object_or_404(models.DataSource.get_by_id_and_org, data_source_id, self.current_org) require_access(data_source.groups, self.current_user, view_only) diff --git a/redash/models/__init__.py b/redash/models/__init__.py index fce517001a..d5b7aecad1 100644 --- a/redash/models/__init__.py +++ b/redash/models/__init__.py @@ -68,6 +68,7 @@ class TableMetadata(db.Model): id = Column(db.Integer, primary_key=True) data_source_id = Column(db.Integer, db.ForeignKey("data_sources.id")) table_exists = Column(db.Boolean, default=True) + table_visible = Column(db.Boolean, default=True) table_name = Column(db.String(255)) table_description = Column(db.String(4096), nullable=True) column_metadata = Column(db.Boolean, default=False) @@ -83,6 +84,7 @@ def to_dict(self): 'id': self.id, 'data_source_id': self.data_source_id, 'table_exists': self.table_exists, + 'table_visible': self.table_visible, 'table_name': self.table_name, 'table_description': self.table_description, 'column_metadata': self.column_metadata, @@ -97,6 +99,7 @@ class ColumnMetadata(db.Model): column_type = Column(db.String(255), nullable=True) column_example = Column(db.String(4096), nullable=True) column_exists = Column(db.Boolean, default=True) + column_description = Column(db.String(4096), nullable=True) __tablename__ = 'column_metadata' @@ -111,6 +114,7 @@ def to_dict(self): 'column_type': self.column_type, 'column_example': self.column_example, 'column_exists': self.column_exists, + 'column_description': self.column_description, } @python_2_unicode_compatible @@ -198,14 +202,28 @@ def delete(self): db.session.commit() return res + def save_schema(self, schema_info): + if 'columnId' in schema_info: + db.session.query(ColumnMetadata).filter( + ColumnMetadata.table_id==schema_info['tableId']).filter( + ColumnMetadata.id==schema_info['columnId']).update( + schema_info['schema']) + else: + db.session.query(TableMetadata).filter(TableMetadata.id==schema_info['tableId']).update(schema_info['schema']) + + db.session.commit() + def get_schema(self, refresh=False): schema = [] tables = db.session.query(TableMetadata).filter(TableMetadata.data_source_id == self.id).all() for table in tables: table_info = { + 'id': table.id, 'name': table.table_name, 'exists': table.table_exists, + 'visible': table.table_visible, 'hasColumnMetadata': table.column_metadata, + 'table_description': table.table_description, 'columns': []} columns = db.session.query(ColumnMetadata).filter(ColumnMetadata.table_id==table.id) table_info['columns'] = sorted([{ @@ -213,7 +231,8 @@ def get_schema(self, refresh=False): 'name': column.column_name, 'type': column.column_type, 'exists': column.column_exists, - 'example': column.column_example + 'example': column.column_example, + 'column_description': column.column_description, } for column in columns], key=lambda column: column['name']) schema.append(table_info) diff --git a/tests/models/test_data_sources.py b/tests/models/test_data_sources.py index 6bc58a497f..415fba50e0 100644 --- a/tests/models/test_data_sources.py +++ b/tests/models/test_data_sources.py @@ -14,13 +14,17 @@ def test_get_schema(self): column_example=True) return_value = [{ + 'id': table_metadata.id, 'name': 'table', 'hasColumnMetadata': False, 'exists': True, + 'visible': True, + 'table_description': None, 'columns': [{ 'key': 1, 'name': 'column', 'type': 'boolean', + 'column_description': None, 'exists': True, 'example': True }] diff --git a/tests/tasks/test_refresh_schemas.py b/tests/tasks/test_refresh_schemas.py index 33446d83e5..7a6e175214 100644 --- a/tests/tasks/test_refresh_schemas.py +++ b/tests/tasks/test_refresh_schemas.py @@ -23,6 +23,7 @@ def setUp(self): 'column_name': self.COLUMN_NAME, 'column_type': self.COLUMN_TYPE, 'column_example': self.COLUMN_EXAMPLE, + 'column_description': None, 'column_exists': True, } @@ -59,11 +60,14 @@ def test_skips_paused_data_sources(self): refresh_schemas() refresh_job.assert_called() + def test_refresh_schema_creates_tables(self): EXPECTED_TABLE_METADATA = { 'id': 1, 'table_exists': True, 'table_name': 'table', + 'table_visible': True, + 'column_metadata': False, 'sample_query': None, 'table_description': None, 'column_metadata': True, From 221af5333c3d34ff2b6e2e977c1306556d83ce1e Mon Sep 17 00:00:00 2001 From: Marina Samuel Date: Tue, 5 Feb 2019 14:59:07 -0500 Subject: [PATCH 12/21] Add migration for new metadata. --- migrations/versions/cf135a57332e_.py | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 migrations/versions/cf135a57332e_.py diff --git a/migrations/versions/cf135a57332e_.py b/migrations/versions/cf135a57332e_.py new file mode 100644 index 0000000000..c1f65d68ec --- /dev/null +++ b/migrations/versions/cf135a57332e_.py @@ -0,0 +1,30 @@ +"""empty message + +Revision ID: cf135a57332e +Revises: 280daa582976 +Create Date: 2019-02-05 19:52:48.233070 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'cf135a57332e' +down_revision = '280daa582976' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('column_metadata', sa.Column('column_description', sa.String(length=4096), nullable=True)) + op.add_column('table_metadata', sa.Column('table_visible', sa.Boolean(), nullable=False)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('table_metadata', 'table_visible') + op.drop_column('column_metadata', 'column_description') + # ### end Alembic commands ### From 9fc9d7f70a59ca0a5d2f748aa8579f2311740f91 Mon Sep 17 00:00:00 2001 From: Marina Samuel Date: Tue, 18 Dec 2018 13:49:31 -0500 Subject: [PATCH 13/21] Table display improvements. --- client/app/components/queries/SchemaData.jsx | 2 +- .../app/pages/data-sources/EditableTable.jsx | 3 +- client/app/pages/data-sources/SchemaTable.jsx | 69 +++++++++++++------ .../app/pages/data-sources/schema-table.css | 9 ++- 4 files changed, 55 insertions(+), 28 deletions(-) diff --git a/client/app/components/queries/SchemaData.jsx b/client/app/components/queries/SchemaData.jsx index f00d4e82ae..400626aafb 100644 --- a/client/app/components/queries/SchemaData.jsx +++ b/client/app/components/queries/SchemaData.jsx @@ -53,7 +53,7 @@ class SchemaData extends React.PureComponent { onClose={this.props.onClose} visible={this.props.show} > -
+
{this.props.tableDescription}
); } - return ; + return