Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
boonhapus committed Apr 23, 2024
2 parents e835cdb + cf57257 commit 13df525
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 85 deletions.
2 changes: 1 addition & 1 deletion cs_tools/__project__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "1.5.2"
__version__ = "1.5.3"
__docs__ = "https://thoughtspot.github.io/cs_tools/"
__repo__ = "https://github.com/thoughtspot/cs_tools"
__help__ = f"{__repo__}/discussions/"
Expand Down
20 changes: 9 additions & 11 deletions cs_tools/api/middlewares/logical_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import TYPE_CHECKING, Optional, Union
import logging

from cs_tools import utils
from cs_tools.api import _utils
from cs_tools.errors import ContentDoesNotExist
from cs_tools.types import GUID, MetadataCategory, TableRowsFormat
Expand Down Expand Up @@ -103,28 +104,25 @@ def all( # noqa: A003
if include_data_source:
for table in tables:
connection_guid = self.ts.metadata.find_data_source_of_logical_table(guid=table["id"])
source_details = self.ts.metadata.fetch_header_and_extras(
metadata_type="DATA_SOURCE", guid=connection_guid
)
table["data_source"] = source_details["header"]
table["data_source"]["type"] = source_details["type"]
info = self.ts.metadata.fetch_header_and_extras(metadata_type="DATA_SOURCE", guids=[connection_guid]) # type: ignore
table["data_source"] = info[0]["header"]
table["data_source"]["type"] = info[0]["type"]

return tables

def columns(self, guids: list[GUID], *, include_hidden: bool = False, chunksize: int = 10) -> TableRowsFormat:
""" """
columns = []

# for chunk in utils.batched(guids, n=chunksize):
for guid in guids:
r = self.ts.metadata.fetch_header_and_extras(metadata_type="LOGICAL_TABLE", guid=guid)
for chunk in utils.batched(guids, n=chunksize):
info = self.ts.metadata.fetch_header_and_extras(metadata_type="LOGICAL_TABLE", guids=chunk)

for logical_table in r.json()["storables"]:
for column in logical_table.get("columns", []):
for table in info:
for column in table.get("columns", []):
columns.append(
{
"column_guid": column["header"]["id"],
"object_guid": logical_table["header"]["id"],
"object_guid": table["header"]["id"],
"column_name": column["header"]["name"],
"description": column["header"].get("description"),
"data_type": column["dataType"],
Expand Down
149 changes: 79 additions & 70 deletions cs_tools/api/middlewares/metadata.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Optional
import functools as ft
import logging

from cs_tools import utils
Expand All @@ -26,6 +25,7 @@ class MetadataMiddleware:

def __init__(self, ts: ThoughtSpot):
self.ts = ts
self._details_cache: dict[GUID, dict] = {}

def permissions(
self,
Expand Down Expand Up @@ -219,36 +219,50 @@ def objects_exist(self, metadata_type: MetadataObjectType, guids: list[GUID]) ->
existence = {header["id"] for header in r.json()["headers"]}
return {guid: guid in existence for guid in guids}

@ft.cache # noqa: B019
def fetch_header_and_extras(self, metadata_type: MetadataObjectType, guid: GUID) -> dict:
def fetch_header_and_extras(self, metadata_type: MetadataObjectType, guids: list[GUID]) -> list[dict]:
"""
METADATA DETAILS is expensive. Here's our shortcut.
"""
r = self.ts.api.v1.metadata_details(metadata_type=metadata_type, guids=[guid], show_hidden=True)
data = []

d = r.json()["storables"][0]
for guid in guids:
try:
data.append(self._details_cache[guid])
continue
except KeyError:
pass

r = self.ts.api.v1.metadata_details(metadata_type=metadata_type, guids=[guid], show_hidden=True)

if r.is_error:
log.warning(f"Failed to fetch details for {guid} ({metadata_type})")
continue

header_and_extras = {
"metadata_type": metadata_type,
"header": d["header"],
"type": d.get("type"), # READ: .subtype (eg. ONE_TO_ONE_LOGICAL, WORKSHEET, etc..)
# LOGICAL_TABLE extras
"dataSourceId": d.get("dataSourceId"),
"columns": d.get("columns"),
# VIZ extras (answer, liveboard)
"reportContent": d.get("reportContent"),
}
d = r.json()["storables"][0]

return header_and_extras
header_and_extras = {
"metadata_type": metadata_type,
"header": d["header"],
"type": d.get("type"), # READ: .subtype (eg. ONE_TO_ONE_LOGICAL, WORKSHEET, etc..)
# LOGICAL_TABLE extras
"dataSourceId": d.get("dataSourceId"),
"columns": d.get("columns"),
# VIZ extras (answer, liveboard)
"reportContent": d.get("reportContent"),
}

self._details_cache[guid] = header_and_extras
data.append(header_and_extras)

return data

def find_data_source_of_logical_table(self, guid: GUID) -> GUID:
"""
METADATA DETAILS is expensive. Here's our shortcut.
"""
info = self.fetch_header_and_extras(metadata_type="LOGICAL_TABLE", guid=guid)
return info["dataSourceId"]
info = self.fetch_header_and_extras(metadata_type="LOGICAL_TABLE", guids=[guid])
return info[0]["dataSourceId"]

@ft.cache # noqa: B019
def table_references(self, guid: GUID, *, tml_type: str, hidden: bool = False) -> list[MetadataParent]:
"""
Returns a mapping of parent LOGICAL_TABLEs
Expand All @@ -264,61 +278,56 @@ def table_references(self, guid: GUID, *, tml_type: str, hidden: bool = False) -
"""
metadata_type = TMLSupportedContent.from_friendly_type(tml_type)
r = self.fetch_header_and_extras(metadata_type=metadata_type, guids=guid, show_hidden=hidden)
info = self.fetch_header_and_extras(metadata_type=metadata_type, guids=[guid])
mappings: list[MetadataParent] = []

if "storables" not in r.json():
log.warning(f"no detail found for {tml_type} = {guid}")
return mappings

for storable in r.json()["storables"]:
# LOOP THROUGH ALL COLUMNS LOOKING FOR TABLES WE HAVEN'T SEEN
if metadata_type == "LOGICAL_TABLE":
for column in storable["columns"]:
for logical_table in column["sources"]:
parent = MetadataParent(
parent_guid=logical_table["tableId"],
parent_name=logical_table["tableName"],
connection=storable["dataSourceId"],
)

if parent not in mappings:
mappings.append(parent)

# FIND THE TABLE, LOOP THROUGH ALL COLUMNS LOOKING FOR TABLES WE HAVEN'T SEEN
if metadata_type == "QUESTION_ANSWER_BOOK":
visualizations = storable["reportContent"]["sheets"][0]["sheetContent"]["visualizations"]
table_viz = next(v for v in visualizations if v["vizContent"]["vizType"] == "TABLE")

for column in table_viz["vizContent"]["columns"]:
for logical_table in column["referencedTableHeaders"]:
connection_guid = self.find_data_source_of_logical_table(logical_table["id"])

parent = MetadataParent(
parent_guid=logical_table["id"],
parent_name=logical_table["name"],
connection=connection_guid,
)

if parent not in mappings:
mappings.append(parent)

# LOOP THROUGH ALL THE VISUALIZATIONS, FIND THE REFERENCE ANSWER, SEARCH AND ADD THE ANSWER-VIZ MAPPINGS
if metadata_type == "PINBOARD_ANSWER_BOOK":
visualizations = storable["reportContent"]["sheets"][0]["sheetContent"]["visualizations"]

for idx, visualization in enumerate(visualizations, start=1):
viz_mappings = self.table_references(
visualization["vizContent"]["refAnswerBook"]["id"],
tml_type="answer",
hidden=True,
# LOOP THROUGH ALL COLUMNS LOOKING FOR TABLES WE HAVEN'T SEEN
if metadata_type == "LOGICAL_TABLE":
for column in info[0]["columns"]:
for logical_table in column["sources"]:
parent = MetadataParent(
parent_guid=logical_table["tableId"],
parent_name=logical_table["tableName"],
connection=info[0]["dataSourceId"],
)

if parent not in mappings:
mappings.append(parent)

# FIND THE TABLE, LOOP THROUGH ALL COLUMNS LOOKING FOR TABLES WE HAVEN'T SEEN
if metadata_type == "QUESTION_ANSWER_BOOK":
visualizations = info[0]["reportContent"]["sheets"][0]["sheetContent"]["visualizations"]
table_viz = next(v for v in visualizations if v["vizContent"]["vizType"] == "TABLE")

for column in table_viz["vizContent"]["columns"]:
for logical_table in column["referencedTableHeaders"]:
connection_guid = self.find_data_source_of_logical_table(logical_table["id"])

parent = MetadataParent(
parent_guid=logical_table["id"],
parent_name=logical_table["name"],
connection=connection_guid,
)

for parent in viz_mappings:
parent.visualization_guid = visualization["header"]["id"]
parent.visualization_index = f"Viz_{idx}"
if parent not in mappings:
mappings.append(parent)

# LOOP THROUGH ALL THE VISUALIZATIONS, FIND THE REFERENCE ANSWER, SEARCH AND ADD THE ANSWER-VIZ MAPPINGS
if metadata_type == "PINBOARD_ANSWER_BOOK":
visualizations = info[0]["reportContent"]["sheets"][0]["sheetContent"]["visualizations"]

for idx, visualization in enumerate(visualizations, start=1):
viz_mappings = self.table_references(
visualization["vizContent"]["refAnswerBook"]["id"],
tml_type="answer",
hidden=True,
)

for parent in viz_mappings:
parent.visualization_guid = visualization["header"]["id"]
parent.visualization_index = f"Viz_{idx}"

if parent not in mappings:
mappings.append(parent)
if parent not in mappings:
mappings.append(parent)

return mappings
6 changes: 3 additions & 3 deletions cs_tools/cli/tools/searchable/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ def deploy(
log.error(f"Could not find a connection with guid '{connection_guid}'")
raise typer.Exit(1) from None

connection_name = info["header"]["name"]
dialect = info["type"]
connection_name = info[0]["header"]["name"]
dialect = info[0]["type"]

# Care for UPPERCASE or lowercase identity convention in dialects
should_upper = "SNOWFLAKE" in dialect
Expand Down Expand Up @@ -437,7 +437,7 @@ def metadata(
#
for metadata_type in types:
guids = [obj["id"] for obj in content if obj["metadata_type"] == metadata_type]
r = ts.metadata.permissions(guids, type=metadata_type)
r = ts.metadata.permissions(guids, metadata_type=metadata_type)
temp_sync.dump(
models.SharingAccess.__tablename__, data=transform.to_sharing_access(r, cluster=cluster_uuid)
)
Expand Down

0 comments on commit 13df525

Please sign in to comment.