Skip to content

Commit

Permalink
Add filter spec implementation (#276)
Browse files Browse the repository at this point in the history
Add support for filter spec.
Add Title and Description for Filters based on filter spec.
Add support for DateRange filter.
Add support for Single Select filter.
Add support for Filters linked with multiple visualization widgets.

Resolves: #246 #252 #168 #171 


![image](https://github.com/user-attachments/assets/c17405a8-4f9a-400f-a48c-f7e074b90108)
  • Loading branch information
bishwajit-db authored Sep 17, 2024
1 parent 7a4dc19 commit d88bb65
Show file tree
Hide file tree
Showing 7 changed files with 402 additions and 7 deletions.
163 changes: 157 additions & 6 deletions src/databricks/labs/lsql/dashboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@
CounterSpec,
Dashboard,
Dataset,
DateRangePickerSpec,
DisplayType,
DropdownSpec,
Field,
Layout,
MultiSelectSpec,
Expand Down Expand Up @@ -193,18 +195,50 @@ def split(self) -> tuple[str, str]:
return "", self._content


class FilterHandler(BaseHandler):
"""Handle filter files."""

def _parse_header(self, header: str) -> dict:
if not header:
return {}
metadata = json.loads(header)
# The user can either provide a single filter column as a string or a list of filter columns
# Only one of column or columns should be set
filter_col = metadata.pop("column", None)
filter_cols = metadata.pop("columns", None)
if not filter_col and not filter_cols:
raise ValueError(f"Neither column nor columns set in {self._path}")
if filter_col and filter_cols:
raise ValueError(f"Both column and columns set in {self._path}")
# If a single column is provided, convert it to a list of one column
# Please note that column/columns key in .filter.json files are mapped to the filters key in the TileMetadata
metadata["filters"] = [filter_col] if filter_col else filter_cols
metadata["widget_type"] = WidgetType(metadata.pop("type", "DROPDOWN").upper())
return metadata

def split(self) -> tuple[str, str]:
trimmed_content = self._content.strip()
return trimmed_content, ""


@unique
class WidgetType(str, Enum):
"""The query widget type"""

AUTO = "AUTO"
TABLE = "TABLE"
COUNTER = "COUNTER"
DATE_RANGE_PICKER = "DATE_RANGE_PICKER"
MULTI_SELECT = "MULTI_SELECT"
DROPDOWN = "DROPDOWN"

def as_widget_spec(self) -> type[WidgetSpec]:
widget_spec_mapping: dict[str, type[WidgetSpec]] = {
"TABLE": TableV1Spec,
"COUNTER": CounterSpec,
"DATE_RANGE_PICKER": DateRangePickerSpec,
"MULTI_SELECT": MultiSelectSpec,
"DROPDOWN": DropdownSpec,
}
if self.name not in widget_spec_mapping:
raise ValueError(f"Can not convert to widget spec: {self}")
Expand Down Expand Up @@ -266,13 +300,18 @@ def is_markdown(self) -> bool:
def is_query(self) -> bool:
return self.path is not None and self.path.suffix == ".sql"

def is_filter(self) -> bool:
return self.path is not None and self.path.name.endswith(".filter.json")

@property
def handler(self) -> BaseHandler:
handler = BaseHandler
if self.is_markdown():
handler = MarkdownHandler
elif self.is_query():
handler = QueryHandler
elif self.is_filter():
handler = FilterHandler
return handler(self.path)

@classmethod
Expand Down Expand Up @@ -346,8 +385,9 @@ def validate(self) -> None:
if len(self.content) == 0:
raise ValueError(f"Tile has empty content: {self}")

def get_layouts(self) -> Iterable[Layout]:
def get_layouts(self, dashboard_metadata: "DashboardMetadata") -> Iterable[Layout]:
"""Get the layout(s) reflecting this tile in the dashboard."""
_ = dashboard_metadata # Not using dashboard_metadata for default implementation
widget = Widget(name=f"{self.metadata.id}_widget", textbox_spec=self.content)
layout = Layout(widget=widget, position=self.position)
yield layout
Expand All @@ -372,6 +412,8 @@ def from_tile_metadata(cls, tile_metadata: TileMetadata) -> "Tile":
"""Create a tile given the tile metadata."""
if tile_metadata.is_markdown():
return MarkdownTile(tile_metadata)
if tile_metadata.is_filter():
return FilterTile(tile_metadata)
query_tile = QueryTile(tile_metadata)
spec_type = query_tile.infer_spec_type()
if spec_type is None:
Expand Down Expand Up @@ -668,7 +710,7 @@ def _get_filters_layouts(self) -> Iterable[Layout]:
layout = Layout(widget=widget, position=position)
yield layout

def get_layouts(self) -> Iterable[Layout]:
def get_layouts(self, _) -> Iterable[Layout]:
"""Get the layout(s) reflecting this tile in the dashboard."""
yield from self._get_query_layouts()
yield from self._get_filters_layouts()
Expand Down Expand Up @@ -728,6 +770,115 @@ def _get_query_widget_spec(fields: list[Field], *, frame: WidgetFrameSpec | None
return spec


@dataclass
class FilterTile(Tile):
_position: Position = dataclasses.field(default_factory=lambda: Position(0, 0, 3, 2))

def validate(self) -> None:
"""Validate the tile
Raises:
ValueError : If the tile is invalid.
"""
if not self.metadata.is_filter():
raise ValueError(f"Tile is not a filter file: {self}")
if len(self.metadata.filters) == 0:
raise ValueError(f"Filter tile has no filters defined: {self}")
if self.metadata.widget_type not in {
WidgetType.MULTI_SELECT,
WidgetType.DATE_RANGE_PICKER,
WidgetType.DROPDOWN,
}:
raise ValueError(f"Filter tile has an invalid widget type: {self}")

def get_layouts(self, dashboard_metadata: "DashboardMetadata") -> Iterable[Layout]:
"""Get the layout(s) reflecting this tile in the dashboard."""
datasets = dashboard_metadata.get_datasets()
widget = self._create_widget(datasets)
layout = Layout(widget=widget, position=self.position)
yield layout

def _create_widget(self, datasets: list[Dataset]) -> Widget:
dataset_columns = self._get_dataset_columns(datasets)
# This method is called during get layouts.
# Metadata validation is done before getting the layouts.
# That's why dataset_columns is not being validated during metadata validation.
if len(dataset_columns) == 0:
err_msg = f"Filter tile has no matching dataset columns: {self}"
raise ValueError(err_msg)

filter_type = self.metadata.widget_type
return self._create_filter_widget(dataset_columns, filter_type.as_widget_spec())

def _get_dataset_columns(self, datasets: list[Dataset]) -> set[tuple[str, str]]:
"""Get the filter column and dataset name pairs."""
dataset_columns = set()
for dataset in datasets:
for field in self._find_filter_fields(dataset.query):
dataset_columns.add((field.name, dataset.name))
return dataset_columns

def _find_filter_fields(self, query: str) -> list[Field]:
"""Find the fields in a query matching the filter names.
The fields are the projections in the query's top level SELECT.
"""
try:
abstract_syntax_tree = sqlglot.parse_one(query, dialect=_SQL_DIALECT)
except sqlglot.ParseError as e:
logger.warning(f"Error while parsing {query}: {e}")
return []
filters = {name.lower() for name in self.metadata.filters}
filter_fields = []
for projection in abstract_syntax_tree.find_all(sqlglot.exp.Select):
if projection.depth > 0:
continue
for named_select in projection.named_selects:
if named_select.lower() not in filters:
continue
field = Field(name=named_select, expression=f"`{named_select}`")
filter_fields.append(field)
return filter_fields

def _create_filter_widget(
self,
dataset_columns: set[tuple[str, str]],
spec_type,
) -> Widget:
frame = self._create_widget_frame()
control_encodings, queries = self._generate_filter_encodings_and_queries(dataset_columns)
control_encoding_map = ControlEncodingMap(control_encodings)
spec = spec_type(encodings=control_encoding_map, frame=frame)
widget = Widget(name=f"{self.metadata.id}_widget", queries=queries, spec=spec)
return widget

def _create_widget_frame(self) -> WidgetFrameSpec:
return WidgetFrameSpec(
title=self.metadata.title,
show_title=len(self.metadata.title) > 0,
description=self.metadata.description,
show_description=len(self.metadata.description) > 0,
)

def _generate_filter_encodings_and_queries(
self, dataset_columns: set[tuple[str, str]]
) -> tuple[list[ControlEncoding], list[NamedQuery]]:
encodings: list[ControlEncoding] = []
queries = []

for column, dataset_name in dataset_columns:
fields = [
Field(name=column, expression=f"`{column}`"),
Field(name=f"{column}_associativity", expression="COUNT_IF(`associative_filter_predicate_group`)"),
]
query = Query(dataset_name=dataset_name, fields=fields, disaggregated=False)
named_query = NamedQuery(name=f"{self.metadata.widget_type}_{dataset_name}_{column}", query=query)
queries.append(named_query)
control_encoding = ControlFieldEncoding(column, named_query.name, display_name=column)
encodings.append(control_encoding)

return encodings, queries


@dataclass
class DashboardMetadata:
"""The metadata defining a lakeview dashboard"""
Expand Down Expand Up @@ -799,7 +950,7 @@ def replace_database(self, *args, **kwargs) -> "DashboardMetadata":
tiles.append(tile)
return dataclasses.replace(self, _tiles=tiles)

def _get_datasets(self) -> list[Dataset]:
def get_datasets(self) -> list[Dataset]:
"""Get the datasets for the dashboard."""
datasets: list[Dataset] = []
for tile in self.tiles:
Expand All @@ -811,12 +962,12 @@ def _get_layouts(self) -> list[Layout]:
"""Get the layouts for the dashboard."""
layouts: list[Layout] = []
for tile in self.tiles:
layouts.extend(tile.get_layouts())
layouts.extend(tile.get_layouts(self))
return layouts

def as_lakeview(self) -> Dashboard:
"""Create a lakeview dashboard from the dashboard metadata."""
datasets = self._get_datasets()
datasets = self.get_datasets()
layouts = self._get_layouts()
page = Page(
name=self.display_name,
Expand Down Expand Up @@ -890,7 +1041,7 @@ def _from_dashboard_folder(cls, folder: Path) -> "DashboardMetadata":
"""Read the dashboard metadata from the tile files."""
tiles = []
for path in folder.iterdir():
if path.suffix not in {".sql", ".md"}:
if not path.name.endswith((".sql", ".md", ".filter.json")):
continue
tile_metadata = TileMetadata.from_path(path)
tile = Tile.from_tile_metadata(tile_metadata)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"columns": ["sales_date", "inventory_date"],
"title": "Date",
"description": "Filter by date",
"type": "DATE_RANGE_PICKER"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"columns": ["product_category", "category"],
"title": "Category",
"description": "Filter by category"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/* --title 'Product Sales' */
SELECT
CAST(sales_date AS DATE) AS sales_date,
product_category,
sales_amount
FROM VALUES
('2023-01-01', 'Electronics', 1000.00),
('2023-01-02', 'Furniture', 1200.00),
('2023-01-03', 'Clothing', 1100.00),
('2023-01-04', 'Electronics', 1050.00),
('2023-01-05', 'Furniture', 1250.00),
('2023-01-06', 'Clothing', 1150.00),
('2023-01-07', 'Electronics', 1300.00),
('2023-01-08', 'Furniture', 1400.00),
('2023-01-09', 'Clothing', 1200.00),
('2023-01-10', 'Electronics', 1350.00),
('2023-01-11', 'Furniture', 1500.00),
('2023-01-12', 'Clothing', 1250.00),
('2023-01-13', 'Electronics', 1400.00),
('2023-01-14', 'Furniture', 1550.00),
('2023-01-15', 'Clothing', 1300.00),
('2023-01-16', 'Electronics', 1450.00),
('2023-01-17', 'Furniture', 1600.00),
('2023-01-18', 'Clothing', 1350.00),
('2023-01-19', 'Electronics', 1500.00),
('2023-01-20', 'Furniture', 1650.00),
('2023-01-21', 'Clothing', 1400.00),
('2023-01-22', 'Electronics', 1550.00),
('2023-01-23', 'Furniture', 1700.00),
('2023-01-24', 'Clothing', 1450.00),
('2023-01-25', 'Electronics', 1600.00),
('2023-01-26', 'Furniture', 1750.00),
('2023-01-27', 'Clothing', 1500.00),
('2023-01-28', 'Electronics', 1650.00),
('2023-01-29', 'Furniture', 1800.00),
('2023-01-30', 'Clothing', 1550.00),
('2023-01-31', 'Electronics', 1700.00) AS tab(sales_date, product_category, sales_amount)
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/* --title 'Product Inventory' */
SELECT
CAST(inventory_date AS DATE) AS inventory_date,
category,
product_name,
quantity
FROM VALUES
('2023-01-01', 'Electronics', 'Smartphone', 120),
('2023-01-02', 'Furniture', 'Office Chair', 45),
('2023-01-03', 'Clothing', 'T-Shirt', 200),
('2023-01-04', 'Electronics', 'Laptop', 75),
('2023-01-05', 'Furniture', 'Desk', 30),
('2023-01-06', 'Clothing', 'Jeans', 150),
('2023-01-07', 'Electronics', 'Headphones', 80),
('2023-01-08', 'Furniture', 'Bookshelf', 25),
('2023-01-09', 'Clothing', 'Jacket', 90),
('2023-01-10', 'Electronics', 'Tablet', 65),
('2023-01-11', 'Furniture', 'Dining Table', 20),
('2023-01-12', 'Clothing', 'Sweater', 110),
('2023-01-13', 'Electronics', 'Smartwatch', 130),
('2023-01-14', 'Furniture', 'Sofa', 10),
('2023-01-15', 'Clothing', 'Hoodie', 140),
('2023-01-16', 'Electronics', 'Smartphone', 125),
('2023-01-17', 'Furniture', 'Office Chair', 50),
('2023-01-18', 'Clothing', 'T-Shirt', 210),
('2023-01-19', 'Electronics', 'Laptop', 80),
('2023-01-20', 'Furniture', 'Desk', 35),
('2023-01-21', 'Clothing', 'Jeans', 160),
('2023-01-22', 'Electronics', 'Headphones', 85),
('2023-01-23', 'Furniture', 'Bookshelf', 30),
('2023-01-24', 'Clothing', 'Jacket', 95),
('2023-01-25', 'Electronics', 'Tablet', 70),
('2023-01-26', 'Furniture', 'Dining Table', 25),
('2023-01-27', 'Clothing', 'Sweater', 115),
('2023-01-28', 'Electronics', 'Smartwatch', 135),
('2023-01-29', 'Furniture', 'Sofa', 15),
('2023-01-30', 'Clothing', 'Hoodie', 145),
('2023-01-31', 'Electronics', 'Smartphone', 130) AS inventory_data(inventory_date, category, product_name, quantity)
12 changes: 12 additions & 0 deletions tests/integration/test_dashboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,15 @@ def test_dashboards_creates_dashboard_with_replace_database(ws, make_dashboard,
sdk_dashboard = dashboards.create_dashboard(dashboard_metadata, dashboard_id=sdk_dashboard.dashboard_id)

assert ws.lakeview.get(sdk_dashboard.dashboard_id)


def test_dashboard_deploys_dashboard_with_filters(ws, make_dashboard):
dashboards = Dashboards(ws)
sdk_dashboard = make_dashboard()

dashboard_folder = Path(__file__).parent / "dashboards" / "filter_spec_basic"
dashboard_metadata = DashboardMetadata.from_path(dashboard_folder)

sdk_dashboard = dashboards.create_dashboard(dashboard_metadata, dashboard_id=sdk_dashboard.dashboard_id)

assert ws.lakeview.get(sdk_dashboard.dashboard_id)
Loading

0 comments on commit d88bb65

Please sign in to comment.