From 4bb29252955bc913cb41416def27329ea30b3d8a Mon Sep 17 00:00:00 2001 From: "Hugh A. Miles II" Date: Mon, 13 Nov 2023 13:18:28 -0500 Subject: [PATCH] fix: always denorm column value before querying values (#25919) (cherry picked from commit 8d8e1bb637be08b0345407ea13cfa81034eef1d5) --- superset/connectors/base/models.py | 7 ---- superset/connectors/sqla/models.py | 29 ---------------- superset/datasource/api.py | 4 +++ superset/models/helpers.py | 56 ++++++++++++++---------------- 4 files changed, 31 insertions(+), 65 deletions(-) diff --git a/superset/connectors/base/models.py b/superset/connectors/base/models.py index d5386c7a66c3e..1fc0fde5751c4 100644 --- a/superset/connectors/base/models.py +++ b/superset/connectors/base/models.py @@ -496,13 +496,6 @@ def query(self, query_obj: QueryObjectDict) -> QueryResult: """ raise NotImplementedError() - def values_for_column(self, column_name: str, limit: int = 10000) -> list[Any]: - """Given a column, returns an iterable of distinct values - - This is used to populate the dropdown showing a list of - values in filters in the explore view""" - raise NotImplementedError() - @staticmethod def default_query(qry: Query) -> Query: return qry diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index e366940ff2592..510ca54ae8ffa 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -46,7 +46,6 @@ inspect, Integer, or_, - select, String, Table, Text, @@ -793,34 +792,6 @@ def get_fetch_values_predicate( ) ) from ex - def values_for_column(self, column_name: str, limit: int = 10000) -> list[Any]: - """Runs query against sqla to retrieve some - sample values for the given column. - """ - cols = {col.column_name: col for col in self.columns} - target_col = cols[column_name] - tp = self.get_template_processor() - tbl, cte = self.get_from_clause(tp) - - qry = ( - select([target_col.get_sqla_col(template_processor=tp)]) - .select_from(tbl) - .distinct() - ) - if limit: - qry = qry.limit(limit) - - if self.fetch_values_predicate: - qry = qry.where(self.get_fetch_values_predicate(template_processor=tp)) - - with self.database.get_sqla_engine_with_context() as engine: - sql = qry.compile(engine, compile_kwargs={"literal_binds": True}) - sql = self._apply_cte(sql, cte) - sql = self.mutate_query_from_config(sql) - - df = pd.read_sql_query(sql=sql, con=engine) - return df[column_name].to_list() - def mutate_query_from_config(self, sql: str) -> str: """Apply config's SQL_QUERY_MUTATOR diff --git a/superset/datasource/api.py b/superset/datasource/api.py index 0c4338e3496dd..131d115755720 100644 --- a/superset/datasource/api.py +++ b/superset/datasource/api.py @@ -120,6 +120,10 @@ def get_column_values( column_name=column_name, limit=row_limit ) return self.response(200, result=payload) + except KeyError: + return self.response( + 400, message=f"Column name {column_name} does not exist" + ) except NotImplementedError: return self.response( 400, diff --git a/superset/models/helpers.py b/superset/models/helpers.py index cfa09154ed748..954806391657b 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -700,10 +700,7 @@ class ExploreMixin: # pylint: disable=too-many-public-methods "MIN": sa.func.MIN, "MAX": sa.func.MAX, } - - @property - def fetch_value_predicate(self) -> str: - return "fix this!" + fetch_values_predicate = None @property def type(self) -> str: @@ -780,17 +777,20 @@ def sql(self) -> str: def columns(self) -> list[Any]: raise NotImplementedError() - def get_fetch_values_predicate( - self, template_processor: Optional[BaseTemplateProcessor] = None - ) -> TextClause: - raise NotImplementedError() - def get_extra_cache_keys(self, query_obj: dict[str, Any]) -> list[Hashable]: raise NotImplementedError() def get_template_processor(self, **kwargs: Any) -> BaseTemplateProcessor: raise NotImplementedError() + def get_fetch_values_predicate( + self, + template_processor: Optional[ # pylint: disable=unused-argument + BaseTemplateProcessor + ] = None, # pylint: disable=unused-argument + ) -> TextClause: + return self.fetch_values_predicate + def get_sqla_row_level_filters( self, template_processor: BaseTemplateProcessor, @@ -1336,36 +1336,34 @@ def get_time_filter( # pylint: disable=too-many-arguments return and_(*l) def values_for_column(self, column_name: str, limit: int = 10000) -> list[Any]: - """Runs query against sqla to retrieve some - sample values for the given column. - """ - cols = {} - for col in self.columns: - if isinstance(col, dict): - cols[col.get("column_name")] = col - else: - cols[col.column_name] = col - - target_col = cols[column_name] - tp = None # todo(hughhhh): add back self.get_template_processor() + # always denormalize column name before querying for values + db_dialect = self.database.get_dialect() + denomalized_col_name = self.database.db_engine_spec.denormalize_name( + db_dialect, column_name + ) + cols = {col.column_name: col for col in self.columns} + target_col = cols[denomalized_col_name] + tp = self.get_template_processor() tbl, cte = self.get_from_clause(tp) - if isinstance(target_col, dict): - sql_column = sa.column(target_col.get("name")) - else: - sql_column = target_col - - qry = sa.select([sql_column]).select_from(tbl).distinct() + qry = ( + sa.select([target_col.get_sqla_col(template_processor=tp)]) + .select_from(tbl) + .distinct() + ) if limit: qry = qry.limit(limit) + if self.fetch_values_predicate: + qry = qry.where(self.get_fetch_values_predicate(template_processor=tp)) + with self.database.get_sqla_engine_with_context() as engine: sql = qry.compile(engine, compile_kwargs={"literal_binds": True}) sql = self._apply_cte(sql, cte) sql = self.mutate_query_from_config(sql) df = pd.read_sql_query(sql=sql, con=engine) - return df[column_name].to_list() + return df[denomalized_col_name].to_list() def get_timestamp_expression( self, @@ -1937,7 +1935,7 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma ) having_clause_and += [self.text(having)] - if apply_fetch_values_predicate and self.fetch_values_predicate: # type: ignore + if apply_fetch_values_predicate and self.fetch_values_predicate: qry = qry.where( self.get_fetch_values_predicate(template_processor=template_processor) )