From 3a9b82abae5cbadc8605d3757c14c8d57713b1ea Mon Sep 17 00:00:00 2001 From: hectorcast-db Date: Thu, 23 May 2024 13:06:31 +0200 Subject: [PATCH] Release v0.28.0 (#652) ### Improvements and new features * Better error message when private link enabled workspaces reject requests ([#647](https://github.com/databricks/databricks-sdk-py/pull/647)). ### API Changes * Renamed [w.lakehouse_monitors](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/lakehouse_monitors.html) workspace-level service to [w.quality_monitors](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/quality_monitors.html) . * Added `databricks.sdk.service.vectorsearch.ListValue` dataclass. * Added `databricks.sdk.service.vectorsearch.MapStringValueEntry` dataclass. * Added `databricks.sdk.service.vectorsearch.ScanVectorIndexRequest` dataclass. * Added `databricks.sdk.service.vectorsearch.ScanVectorIndexResponse` dataclass. * Added `databricks.sdk.service.vectorsearch.Struct` dataclass. * Added `databricks.sdk.service.vectorsearch.Value` dataclass. * Added `databricks.sdk.service.catalog.ListConnectionsRequest` dataclass. * Added `databricks.sdk.service.catalog.MonitorRefreshListResponse` dataclass. * Added `databricks.sdk.service.pipelines.IngestionGatewayPipelineDefinition` dataclass. * Added `databricks.sdk.service.pipelines.TableSpecificConfig` dataclass. * Added `databricks.sdk.service.pipelines.TableSpecificConfigScdType` dataclass. * Added `databricks.sdk.service.serving.AppDeploymentArtifacts` dataclass. * Removed `databricks.sdk.service.catalog.EnableSchemaName` dataclass. * Removed `databricks.sdk.service.catalog.DisableSchemaName` dataclass. * Removed `databricks.sdk.service.marketplace.SortBySpec` dataclass. * Removed `databricks.sdk.service.marketplace.SortOrder` dataclass. * Renamed `databricks.sdk.service.catalog.DeleteLakehouseMonitorRequest` dataclass to `databricks.sdk.service.catalog.DeleteQualityMonitorRequest`. * Renamed `databricks.sdk.service.catalog.GetLakehouseMonitorRequest` dataclass to `databricks.sdk.service.catalog.GetQualityMonitorRequest`. * Added `next_page_token` field for `databricks.sdk.service.catalog.ListConnectionsResponse`. * Added `dashboard_id` field for `databricks.sdk.service.catalog.UpdateMonitor`. * Added `is_ascending` and `sort_by` fields for `databricks.sdk.service.marketplace.ListListingsRequest`. * Added `is_ascending` field for `databricks.sdk.service.marketplace.SearchListingsRequest`. * Added `gateway_definition` field for `databricks.sdk.service.pipelines.CreatePipeline`. * Added `gateway_definition` field for `databricks.sdk.service.pipelines.EditPipeline`. * Added `table_configuration` field for `databricks.sdk.service.pipelines.ManagedIngestionPipelineDefinition`. * Added `gateway_definition` field for `databricks.sdk.service.pipelines.PipelineSpec`. * Added `table_configuration` field for `databricks.sdk.service.pipelines.SchemaSpec`. * Added `table_configuration` field for `databricks.sdk.service.pipelines.TableSpec`. * Added `deployment_artifacts` field for `databricks.sdk.service.serving.AppDeployment`. * Added `route_optimized` field for `databricks.sdk.service.serving.CreateServingEndpoint`. * Added `contents` field for `databricks.sdk.service.serving.ExportMetricsResponse`. * Added `microsoft_entra_client_id`, `microsoft_entra_client_secret` and `microsoft_entra_tenant_id` fields for `databricks.sdk.service.serving.OpenAiConfig`. * Added `endpoint_url` and `route_optimized` fields for `databricks.sdk.service.serving.ServingEndpointDetailed`. * Added `storage_root` field for `databricks.sdk.service.sharing.CreateShare`. * Added `storage_location` and `storage_root` fields for `databricks.sdk.service.sharing.ShareInfo`. * Added `storage_root` field for `databricks.sdk.service.sharing.UpdateShare`. * Added `embedding_writeback_table` field for `databricks.sdk.service.vectorsearch.DeltaSyncVectorIndexSpecRequest`. * Added `embedding_writeback_table` field for `databricks.sdk.service.vectorsearch.DeltaSyncVectorIndexSpecResponse`. * Changed `schema_name` field for `databricks.sdk.service.catalog.DisableRequest` to `str` dataclass. * Changed `schema_name` field for `databricks.sdk.service.catalog.EnableRequest` to `str` dataclass. * Changed `cluster_status()` method for [w.libraries](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/libraries.html) workspace-level service to return `databricks.sdk.service.compute.ClusterLibraryStatuses` dataclass. * Changed `spec` and `cluster_source` fields for `databricks.sdk.service.compute.ClusterDetails` to `databricks.sdk.service.compute.ClusterSpec` dataclass. * Changed `openai_api_key` field for `databricks.sdk.service.serving.OpenAiConfig` to no longer be required. * Removed `cluster_source` field for `databricks.sdk.service.compute.ClusterAttributes`. * Removed `cluster_source` field for `databricks.sdk.service.compute.ClusterSpec`. * Removed `databricks.sdk.service.compute.ClusterStatusResponse` dataclass. * Removed `cluster_source` field for `databricks.sdk.service.compute.CreateCluster`. * Removed `clone_from` and `cluster_source` fields for `databricks.sdk.service.compute.EditCluster`. * Removed `sort_by_spec` field for `databricks.sdk.service.marketplace.ListListingsRequest`. * Added `scan_index()` method for [w.vector_search_indexes](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/vector_search_indexes.html) workspace-level service. * Changed `list()` method for [w.connections](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/connections.html) workspace-level service to require request of `databricks.sdk.service.catalog.ListConnectionsRequest` dataclass. OpenAPI SHA: 7eb5ad9a2ed3e3f1055968a2d1014ac92c06fe92, Date: 2024-05-21 --- .codegen/_openapi_sha | 2 +- CHANGELOG.md | 62 + databricks/sdk/__init__.py | 18 +- databricks/sdk/service/catalog.py | 1294 +++++++++-------- databricks/sdk/service/compute.py | 177 +-- databricks/sdk/service/jobs.py | 13 +- databricks/sdk/service/marketplace.py | 40 +- databricks/sdk/service/pipelines.py | 121 +- databricks/sdk/service/serving.py | 88 +- databricks/sdk/service/sharing.py | 39 +- databricks/sdk/service/sql.py | 1 - databricks/sdk/service/vectorsearch.py | 189 ++- databricks/sdk/service/workspace.py | 12 +- databricks/sdk/version.py | 2 +- docs/dbdataclasses/catalog.rst | 35 +- docs/dbdataclasses/compute.rst | 8 +- docs/dbdataclasses/marketplace.rst | 15 - docs/dbdataclasses/pipelines.rst | 18 + docs/dbdataclasses/serving.rst | 7 + docs/dbdataclasses/sharing.rst | 3 + docs/dbdataclasses/sql.rst | 2 - docs/dbdataclasses/vectorsearch.rst | 24 + docs/workspace/catalog/connections.rst | 13 +- docs/workspace/catalog/index.rst | 2 +- docs/workspace/catalog/quality_monitors.rst | 236 +++ docs/workspace/catalog/system_schemas.rst | 8 +- docs/workspace/compute/clusters.rst | 46 +- docs/workspace/compute/libraries.rst | 4 +- docs/workspace/jobs/jobs.rst | 4 - .../marketplace/consumer_listings.rst | 8 +- docs/workspace/pipelines/pipelines.rst | 8 +- docs/workspace/serving/serving_endpoints.rst | 10 +- docs/workspace/sharing/shares.rst | 10 +- .../vectorsearch/vector_search_indexes.rst | 17 + docs/workspace/workspace/repos.rst | 6 +- examples/connections/list_connections.py | 3 +- 36 files changed, 1640 insertions(+), 905 deletions(-) create mode 100644 docs/workspace/catalog/quality_monitors.rst diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index f07cf44e5..8c62ac620 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -9bb7950fa3390afb97abaa552934bc0a2e069de5 \ No newline at end of file +7eb5ad9a2ed3e3f1055968a2d1014ac92c06fe92 \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 39515bd71..a039d34d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,67 @@ # Version changelog +## 0.28.0 + +### Improvements and new features + + * Better error message when private link enabled workspaces reject requests ([#647](https://github.com/databricks/databricks-sdk-py/pull/647)). + +### API Changes: + + * Renamed [w.lakehouse_monitors](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/lakehouse_monitors.html) workspace-level service to [w.quality_monitors](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/quality_monitors.html) . + * Added `databricks.sdk.service.vectorsearch.ListValue` dataclass. + * Added `databricks.sdk.service.vectorsearch.MapStringValueEntry` dataclass. + * Added `databricks.sdk.service.vectorsearch.ScanVectorIndexRequest` dataclass. + * Added `databricks.sdk.service.vectorsearch.ScanVectorIndexResponse` dataclass. + * Added `databricks.sdk.service.vectorsearch.Struct` dataclass. + * Added `databricks.sdk.service.vectorsearch.Value` dataclass. + * Added `databricks.sdk.service.catalog.ListConnectionsRequest` dataclass. + * Added `databricks.sdk.service.catalog.MonitorRefreshListResponse` dataclass. + * Added `databricks.sdk.service.pipelines.IngestionGatewayPipelineDefinition` dataclass. + * Added `databricks.sdk.service.pipelines.TableSpecificConfig` dataclass. + * Added `databricks.sdk.service.pipelines.TableSpecificConfigScdType` dataclass. + * Added `databricks.sdk.service.serving.AppDeploymentArtifacts` dataclass. + * Removed `databricks.sdk.service.catalog.EnableSchemaName` dataclass. + * Removed `databricks.sdk.service.catalog.DisableSchemaName` dataclass. + * Removed `databricks.sdk.service.marketplace.SortBySpec` dataclass. + * Removed `databricks.sdk.service.marketplace.SortOrder` dataclass. + * Renamed `databricks.sdk.service.catalog.DeleteLakehouseMonitorRequest` dataclass to `databricks.sdk.service.catalog.DeleteQualityMonitorRequest`. + * Renamed `databricks.sdk.service.catalog.GetLakehouseMonitorRequest` dataclass to `databricks.sdk.service.catalog.GetQualityMonitorRequest`. + * Added `next_page_token` field for `databricks.sdk.service.catalog.ListConnectionsResponse`. + * Added `dashboard_id` field for `databricks.sdk.service.catalog.UpdateMonitor`. + * Added `is_ascending` and `sort_by` fields for `databricks.sdk.service.marketplace.ListListingsRequest`. + * Added `is_ascending` field for `databricks.sdk.service.marketplace.SearchListingsRequest`. + * Added `gateway_definition` field for `databricks.sdk.service.pipelines.CreatePipeline`. + * Added `gateway_definition` field for `databricks.sdk.service.pipelines.EditPipeline`. + * Added `table_configuration` field for `databricks.sdk.service.pipelines.ManagedIngestionPipelineDefinition`. + * Added `gateway_definition` field for `databricks.sdk.service.pipelines.PipelineSpec`. + * Added `table_configuration` field for `databricks.sdk.service.pipelines.SchemaSpec`. + * Added `table_configuration` field for `databricks.sdk.service.pipelines.TableSpec`. + * Added `deployment_artifacts` field for `databricks.sdk.service.serving.AppDeployment`. + * Added `route_optimized` field for `databricks.sdk.service.serving.CreateServingEndpoint`. + * Added `contents` field for `databricks.sdk.service.serving.ExportMetricsResponse`. + * Added `microsoft_entra_client_id`, `microsoft_entra_client_secret` and `microsoft_entra_tenant_id` fields for `databricks.sdk.service.serving.OpenAiConfig`. + * Added `endpoint_url` and `route_optimized` fields for `databricks.sdk.service.serving.ServingEndpointDetailed`. + * Added `storage_root` field for `databricks.sdk.service.sharing.CreateShare`. + * Added `storage_location` and `storage_root` fields for `databricks.sdk.service.sharing.ShareInfo`. + * Added `storage_root` field for `databricks.sdk.service.sharing.UpdateShare`. + * Added `embedding_writeback_table` field for `databricks.sdk.service.vectorsearch.DeltaSyncVectorIndexSpecRequest`. + * Added `embedding_writeback_table` field for `databricks.sdk.service.vectorsearch.DeltaSyncVectorIndexSpecResponse`. + * Changed `schema_name` field for `databricks.sdk.service.catalog.DisableRequest` to `str` dataclass. + * Changed `schema_name` field for `databricks.sdk.service.catalog.EnableRequest` to `str` dataclass. + * Changed `cluster_status()` method for [w.libraries](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/libraries.html) workspace-level service to return `databricks.sdk.service.compute.ClusterLibraryStatuses` dataclass. + * Changed `spec` and `cluster_source` fields for `databricks.sdk.service.compute.ClusterDetails` to `databricks.sdk.service.compute.ClusterSpec` dataclass. + * Changed `openai_api_key` field for `databricks.sdk.service.serving.OpenAiConfig` to no longer be required. + * Removed `cluster_source` field for `databricks.sdk.service.compute.ClusterAttributes`. + * Removed `cluster_source` field for `databricks.sdk.service.compute.ClusterSpec`. + * Removed `databricks.sdk.service.compute.ClusterStatusResponse` dataclass. + * Removed `cluster_source` field for `databricks.sdk.service.compute.CreateCluster`. + * Removed `clone_from` and `cluster_source` fields for `databricks.sdk.service.compute.EditCluster`. + * Removed `sort_by_spec` field for `databricks.sdk.service.marketplace.ListListingsRequest`. + * Added `scan_index()` method for [w.vector_search_indexes](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/vector_search_indexes.html) workspace-level service. + * Changed `list()` method for [w.connections](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/connections.html) workspace-level service to require request of `databricks.sdk.service.catalog.ListConnectionsRequest` dataclass. + +OpenAPI SHA: 7eb5ad9a2ed3e3f1055968a2d1014ac92c06fe92, Date: 2024-05-21 ## 0.27.1 ### Bug Fixes diff --git a/databricks/sdk/__init__.py b/databricks/sdk/__init__.py index 312d538be..bb0b2547a 100755 --- a/databricks/sdk/__init__.py +++ b/databricks/sdk/__init__.py @@ -13,9 +13,9 @@ ArtifactAllowlistsAPI, CatalogsAPI, ConnectionsAPI, ExternalLocationsAPI, FunctionsAPI, - GrantsAPI, LakehouseMonitorsAPI, - MetastoresAPI, ModelVersionsAPI, - OnlineTablesAPI, + GrantsAPI, MetastoresAPI, + ModelVersionsAPI, OnlineTablesAPI, + QualityMonitorsAPI, RegisteredModelsAPI, SchemasAPI, StorageCredentialsAPI, SystemSchemasAPI, @@ -194,7 +194,6 @@ def __init__(self, self._instance_profiles = InstanceProfilesAPI(self._api_client) self._ip_access_lists = IpAccessListsAPI(self._api_client) self._jobs = JobsAPI(self._api_client) - self._lakehouse_monitors = LakehouseMonitorsAPI(self._api_client) self._lakeview = LakeviewAPI(self._api_client) self._libraries = LibrariesAPI(self._api_client) self._metastores = MetastoresAPI(self._api_client) @@ -214,6 +213,7 @@ def __init__(self, self._api_client) self._provider_providers = ProviderProvidersAPI(self._api_client) self._providers = ProvidersAPI(self._api_client) + self._quality_monitors = QualityMonitorsAPI(self._api_client) self._queries = QueriesAPI(self._api_client) self._query_history = QueryHistoryAPI(self._api_client) self._query_visualizations = QueryVisualizationsAPI(self._api_client) @@ -425,11 +425,6 @@ def jobs(self) -> JobsAPI: """The Jobs API allows you to create, edit, and delete jobs.""" return self._jobs - @property - def lakehouse_monitors(self) -> LakehouseMonitorsAPI: - """A monitor computes and monitors data or model quality metrics for a table over time.""" - return self._lakehouse_monitors - @property def lakeview(self) -> LakeviewAPI: """These APIs provide specific management operations for Lakeview dashboards.""" @@ -520,6 +515,11 @@ def providers(self) -> ProvidersAPI: """A data provider is an object representing the organization in the real world who shares the data.""" return self._providers + @property + def quality_monitors(self) -> QualityMonitorsAPI: + """A monitor computes and monitors data or model quality metrics for a table over time.""" + return self._quality_monitors + @property def queries(self) -> QueriesAPI: """These endpoints are used for CRUD operations on query definitions.""" diff --git a/databricks/sdk/service/catalog.py b/databricks/sdk/service/catalog.py index 7f95caab7..169970dbd 100755 --- a/databricks/sdk/service/catalog.py +++ b/databricks/sdk/service/catalog.py @@ -1778,14 +1778,6 @@ def from_dict(cls, d: Dict[str, any]) -> DisableResponse: return cls() -class DisableSchemaName(Enum): - - ACCESS = 'access' - BILLING = 'billing' - LINEAGE = 'lineage' - OPERATIONAL_DATA = 'operational_data' - - @dataclass class EffectivePermissionsList: privilege_assignments: Optional[List[EffectivePrivilegeAssignment]] = None @@ -1916,14 +1908,6 @@ def from_dict(cls, d: Dict[str, any]) -> EnableResponse: return cls() -class EnableSchemaName(Enum): - - ACCESS = 'access' - BILLING = 'billing' - LINEAGE = 'lineage' - OPERATIONAL_DATA = 'operational_data' - - @dataclass class EncryptionDetails: """Encryption options that apply to clients connecting to cloud storage.""" @@ -2575,16 +2559,22 @@ class ListConnectionsResponse: connections: Optional[List[ConnectionInfo]] = None """An array of connection information objects.""" + next_page_token: Optional[str] = None + """Opaque token to retrieve the next page of results. Absent if there are no more pages. + __page_token__ should be set to this value for the next request (for the next page of results).""" + def as_dict(self) -> dict: """Serializes the ListConnectionsResponse into a dictionary suitable for use as a JSON request body.""" body = {} if self.connections: body['connections'] = [v.as_dict() for v in self.connections] + if self.next_page_token is not None: body['next_page_token'] = self.next_page_token return body @classmethod def from_dict(cls, d: Dict[str, any]) -> ListConnectionsResponse: """Deserializes the ListConnectionsResponse from a dictionary.""" - return cls(connections=_repeated_dict(d, 'connections', ConnectionInfo)) + return cls(connections=_repeated_dict(d, 'connections', ConnectionInfo), + next_page_token=d.get('next_page_token', None)) @dataclass @@ -3500,6 +3490,23 @@ class MonitorRefreshInfoTrigger(Enum): SCHEDULE = 'SCHEDULE' +@dataclass +class MonitorRefreshListResponse: + refreshes: Optional[List[MonitorRefreshInfo]] = None + """List of refreshes.""" + + def as_dict(self) -> dict: + """Serializes the MonitorRefreshListResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.refreshes: body['refreshes'] = [v.as_dict() for v in self.refreshes] + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorRefreshListResponse: + """Deserializes the MonitorRefreshListResponse from a dictionary.""" + return cls(refreshes=_repeated_dict(d, 'refreshes', MonitorRefreshInfo)) + + @dataclass class MonitorSnapshot: @@ -3882,6 +3889,7 @@ class Privilege(Enum): REFRESH = 'REFRESH' SELECT = 'SELECT' SET_SHARE_PERMISSION = 'SET_SHARE_PERMISSION' + SINGLE_USER_ACCESS = 'SINGLE_USER_ACCESS' USAGE = 'USAGE' USE_CATALOG = 'USE_CATALOG' USE_CONNECTION = 'USE_CONNECTION' @@ -5084,6 +5092,10 @@ class UpdateMonitor: metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows).""" + dashboard_id: Optional[str] = None + """Id of dashboard that visualizes the computed metrics. This can be empty if the monitor is in + PENDING state.""" + data_classification_config: Optional[MonitorDataClassificationConfig] = None """The data classification config for the monitor.""" @@ -5115,6 +5127,7 @@ def as_dict(self) -> dict: body = {} if self.baseline_table_name is not None: body['baseline_table_name'] = self.baseline_table_name if self.custom_metrics: body['custom_metrics'] = [v.as_dict() for v in self.custom_metrics] + if self.dashboard_id is not None: body['dashboard_id'] = self.dashboard_id if self.data_classification_config: body['data_classification_config'] = self.data_classification_config.as_dict() if self.inference_log: body['inference_log'] = self.inference_log.as_dict() @@ -5132,6 +5145,7 @@ def from_dict(cls, d: Dict[str, any]) -> UpdateMonitor: """Deserializes the UpdateMonitor from a dictionary.""" return cls(baseline_table_name=d.get('baseline_table_name', None), custom_metrics=_repeated_dict(d, 'custom_metrics', MonitorMetric), + dashboard_id=d.get('dashboard_id', None), data_classification_config=_from_dict(d, 'data_classification_config', MonitorDataClassificationConfig), inference_log=_from_dict(d, 'inference_log', MonitorInferenceLog), @@ -6372,19 +6386,38 @@ def get(self, name: str) -> ConnectionInfo: res = self._api.do('GET', f'/api/2.1/unity-catalog/connections/{name}', headers=headers) return ConnectionInfo.from_dict(res) - def list(self) -> Iterator[ConnectionInfo]: + def list(self, + *, + max_results: Optional[int] = None, + page_token: Optional[str] = None) -> Iterator[ConnectionInfo]: """List connections. List all connections. + :param max_results: int (optional) + Maximum number of connections to return. - If not set, all connections are returned (not + recommended). - when set to a value greater than 0, the page length is the minimum of this value and + a server configured value; - when set to 0, the page length is set to a server configured value + (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + :returns: Iterator over :class:`ConnectionInfo` """ + query = {} + if max_results is not None: query['max_results'] = max_results + if page_token is not None: query['page_token'] = page_token headers = {'Accept': 'application/json', } - json = self._api.do('GET', '/api/2.1/unity-catalog/connections', headers=headers) - parsed = ListConnectionsResponse.from_dict(json).connections - return parsed if parsed is not None else [] + while True: + json = self._api.do('GET', '/api/2.1/unity-catalog/connections', query=query, headers=headers) + if 'connections' in json: + for v in json['connections']: + yield ConnectionInfo.from_dict(v) + if 'next_page_token' not in json or not json['next_page_token']: + return + query['page_token'] = json['next_page_token'] def update(self, name: str, @@ -6896,364 +6929,249 @@ def update(self, return PermissionsList.from_dict(res) -class LakehouseMonitorsAPI: - """A monitor computes and monitors data or model quality metrics for a table over time. It generates metrics - tables and a dashboard that you can use to monitor table health and set alerts. +class MetastoresAPI: + """A metastore is the top-level container of objects in Unity Catalog. It stores data assets (tables and + views) and the permissions that govern access to them. Databricks account admins can create metastores and + assign them to Databricks workspaces to control which workloads use each metastore. For a workspace to use + Unity Catalog, it must have a Unity Catalog metastore attached. - Most write operations require the user to be the owner of the table (or its parent schema or parent - catalog). Viewing the dashboard, computed metrics, or monitor configuration only requires the user to have - **SELECT** privileges on the table (along with **USE_SCHEMA** and **USE_CATALOG**).""" + Each metastore is configured with a root storage location in a cloud storage account. This storage + location is used for metadata and managed tables data. + + NOTE: This metastore is distinct from the metastore included in Databricks workspaces created before Unity + Catalog was released. If your workspace includes a legacy Hive metastore, the data in that metastore is + available in a catalog named hive_metastore.""" def __init__(self, api_client): self._api = api_client - def cancel_refresh(self, table_name: str, refresh_id: str): - """Cancel refresh. - - Cancel an active monitor refresh for the given refresh ID. - - The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the - table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: - - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an - owner of the table + def assign(self, workspace_id: int, metastore_id: str, default_catalog_name: str): + """Create an assignment. - Additionally, the call must be made from the workspace where the monitor was created. + Creates a new metastore assignment. If an assignment for the same __workspace_id__ exists, it will be + overwritten by the new __metastore_id__ and __default_catalog_name__. The caller must be an account + admin. - :param table_name: str - Full name of the table. - :param refresh_id: str - ID of the refresh. + :param workspace_id: int + A workspace ID. + :param metastore_id: str + The unique ID of the metastore. + :param default_catalog_name: str + The name of the default catalog in the metastore. """ + body = {} + if default_catalog_name is not None: body['default_catalog_name'] = default_catalog_name + if metastore_id is not None: body['metastore_id'] = metastore_id + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } - headers = {} - - self._api.do('POST', - f'/api/2.1/unity-catalog/tables/{table_name}/monitor/refreshes/{refresh_id}/cancel', + self._api.do('PUT', + f'/api/2.1/unity-catalog/workspaces/{workspace_id}/metastore', + body=body, headers=headers) def create(self, - table_name: str, - assets_dir: str, - output_schema_name: str, + name: str, *, - baseline_table_name: Optional[str] = None, - custom_metrics: Optional[List[MonitorMetric]] = None, - data_classification_config: Optional[MonitorDataClassificationConfig] = None, - inference_log: Optional[MonitorInferenceLog] = None, - notifications: Optional[MonitorNotifications] = None, - schedule: Optional[MonitorCronSchedule] = None, - skip_builtin_dashboard: Optional[bool] = None, - slicing_exprs: Optional[List[str]] = None, - snapshot: Optional[MonitorSnapshot] = None, - time_series: Optional[MonitorTimeSeries] = None, - warehouse_id: Optional[str] = None) -> MonitorInfo: - """Create a table monitor. - - Creates a new monitor for the specified table. - - The caller must either: 1. be an owner of the table's parent catalog, have **USE_SCHEMA** on the - table's parent schema, and have **SELECT** access on the table 2. have **USE_CATALOG** on the table's - parent catalog, be an owner of the table's parent schema, and have **SELECT** access on the table. 3. - have the following permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on - the table's parent schema - be an owner of the table. + region: Optional[str] = None, + storage_root: Optional[str] = None) -> MetastoreInfo: + """Create a metastore. - Workspace assets, such as the dashboard, will be created in the workspace where this call was made. + Creates a new metastore based on a provided name and optional storage root path. By default (if the + __owner__ field is not set), the owner of the new metastore is the user calling the + __createMetastore__ API. If the __owner__ field is set to the empty string (**""**), the ownership is + assigned to the System User instead. - :param table_name: str - Full name of the table. - :param assets_dir: str - The directory to store monitoring assets (e.g. dashboard, metric tables). - :param output_schema_name: str - Schema where output metric tables are created. - :param baseline_table_name: str (optional) - Name of the baseline table from which drift metrics are computed from. Columns in the monitored - table should also be present in the baseline table. - :param custom_metrics: List[:class:`MonitorMetric`] (optional) - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics - (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). - :param data_classification_config: :class:`MonitorDataClassificationConfig` (optional) - The data classification config for the monitor. - :param inference_log: :class:`MonitorInferenceLog` (optional) - Configuration for monitoring inference logs. - :param notifications: :class:`MonitorNotifications` (optional) - The notification settings for the monitor. - :param schedule: :class:`MonitorCronSchedule` (optional) - The schedule for automatically updating and refreshing metric tables. - :param skip_builtin_dashboard: bool (optional) - Whether to skip creating a default dashboard summarizing data quality metrics. - :param slicing_exprs: List[str] (optional) - List of column expressions to slice data with for targeted analysis. The data is grouped by each - expression independently, resulting in a separate slice for each predicate and its complements. For - high-cardinality columns, only the top 100 unique values by frequency will generate slices. - :param snapshot: :class:`MonitorSnapshot` (optional) - Configuration for monitoring snapshot tables. - :param time_series: :class:`MonitorTimeSeries` (optional) - Configuration for monitoring time series tables. - :param warehouse_id: str (optional) - Optional argument to specify the warehouse for dashboard creation. If not specified, the first - running warehouse will be used. + :param name: str + The user-specified name of the metastore. + :param region: str (optional) + Cloud region which the metastore serves (e.g., `us-west-2`, `westus`). If this field is omitted, the + region of the workspace receiving the request will be used. + :param storage_root: str (optional) + The storage root URL for metastore - :returns: :class:`MonitorInfo` + :returns: :class:`MetastoreInfo` """ body = {} - if assets_dir is not None: body['assets_dir'] = assets_dir - if baseline_table_name is not None: body['baseline_table_name'] = baseline_table_name - if custom_metrics is not None: body['custom_metrics'] = [v.as_dict() for v in custom_metrics] - if data_classification_config is not None: - body['data_classification_config'] = data_classification_config.as_dict() - if inference_log is not None: body['inference_log'] = inference_log.as_dict() - if notifications is not None: body['notifications'] = notifications.as_dict() - if output_schema_name is not None: body['output_schema_name'] = output_schema_name - if schedule is not None: body['schedule'] = schedule.as_dict() - if skip_builtin_dashboard is not None: body['skip_builtin_dashboard'] = skip_builtin_dashboard - if slicing_exprs is not None: body['slicing_exprs'] = [v for v in slicing_exprs] - if snapshot is not None: body['snapshot'] = snapshot.as_dict() - if time_series is not None: body['time_series'] = time_series.as_dict() - if warehouse_id is not None: body['warehouse_id'] = warehouse_id + if name is not None: body['name'] = name + if region is not None: body['region'] = region + if storage_root is not None: body['storage_root'] = storage_root headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } - res = self._api.do('POST', - f'/api/2.1/unity-catalog/tables/{table_name}/monitor', - body=body, - headers=headers) - return MonitorInfo.from_dict(res) + res = self._api.do('POST', '/api/2.1/unity-catalog/metastores', body=body, headers=headers) + return MetastoreInfo.from_dict(res) - def delete(self, table_name: str): - """Delete a table monitor. - - Deletes a monitor for the specified table. + def current(self) -> MetastoreAssignment: + """Get metastore assignment for workspace. - The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the - table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: - - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an - owner of the table. + Gets the metastore assignment for the workspace being accessed. - Additionally, the call must be made from the workspace where the monitor was created. + :returns: :class:`MetastoreAssignment` + """ + + headers = {'Accept': 'application/json', } + + res = self._api.do('GET', '/api/2.1/unity-catalog/current-metastore-assignment', headers=headers) + return MetastoreAssignment.from_dict(res) + + def delete(self, id: str, *, force: Optional[bool] = None): + """Delete a metastore. - Note that the metric tables and dashboard will not be deleted as part of this call; those assets must - be manually cleaned up (if desired). + Deletes a metastore. The caller must be a metastore admin. - :param table_name: str - Full name of the table. + :param id: str + Unique ID of the metastore. + :param force: bool (optional) + Force deletion even if the metastore is not empty. Default is false. """ - headers = {} + query = {} + if force is not None: query['force'] = force + headers = {'Accept': 'application/json', } - self._api.do('DELETE', f'/api/2.1/unity-catalog/tables/{table_name}/monitor', headers=headers) + self._api.do('DELETE', f'/api/2.1/unity-catalog/metastores/{id}', query=query, headers=headers) - def get(self, table_name: str) -> MonitorInfo: - """Get a table monitor. - - Gets a monitor for the specified table. - - The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the - table's parent catalog and be an owner of the table's parent schema. 3. have the following - permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent - schema - **SELECT** privilege on the table. + def get(self, id: str) -> MetastoreInfo: + """Get a metastore. - The returned information includes configuration values, as well as information on assets created by - the monitor. Some information (e.g., dashboard) may be filtered out if the caller is in a different - workspace than where the monitor was created. + Gets a metastore that matches the supplied ID. The caller must be a metastore admin to retrieve this + info. - :param table_name: str - Full name of the table. + :param id: str + Unique ID of the metastore. - :returns: :class:`MonitorInfo` + :returns: :class:`MetastoreInfo` """ headers = {'Accept': 'application/json', } - res = self._api.do('GET', f'/api/2.1/unity-catalog/tables/{table_name}/monitor', headers=headers) - return MonitorInfo.from_dict(res) + res = self._api.do('GET', f'/api/2.1/unity-catalog/metastores/{id}', headers=headers) + return MetastoreInfo.from_dict(res) - def get_refresh(self, table_name: str, refresh_id: str) -> MonitorRefreshInfo: - """Get refresh. - - Gets info about a specific monitor refresh using the given refresh ID. - - The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the - table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: - - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - - **SELECT** privilege on the table. - - Additionally, the call must be made from the workspace where the monitor was created. + def list(self) -> Iterator[MetastoreInfo]: + """List metastores. - :param table_name: str - Full name of the table. - :param refresh_id: str - ID of the refresh. + Gets an array of the available metastores (as __MetastoreInfo__ objects). The caller must be an admin + to retrieve this info. There is no guarantee of a specific ordering of the elements in the array. - :returns: :class:`MonitorRefreshInfo` + :returns: Iterator over :class:`MetastoreInfo` """ headers = {'Accept': 'application/json', } - res = self._api.do('GET', - f'/api/2.1/unity-catalog/tables/{table_name}/monitor/refreshes/{refresh_id}', - headers=headers) - return MonitorRefreshInfo.from_dict(res) + json = self._api.do('GET', '/api/2.1/unity-catalog/metastores', headers=headers) + parsed = ListMetastoresResponse.from_dict(json).metastores + return parsed if parsed is not None else [] - def list_refreshes(self, table_name: str) -> Iterator[MonitorRefreshInfo]: - """List refreshes. - - Gets an array containing the history of the most recent refreshes (up to 25) for this table. - - The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the - table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: - - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - - **SELECT** privilege on the table. - - Additionally, the call must be made from the workspace where the monitor was created. + def summary(self) -> GetMetastoreSummaryResponse: + """Get a metastore summary. - :param table_name: str - Full name of the table. + Gets information about a metastore. This summary includes the storage credential, the cloud vendor, + the cloud region, and the global metastore ID. - :returns: Iterator over :class:`MonitorRefreshInfo` + :returns: :class:`GetMetastoreSummaryResponse` """ headers = {'Accept': 'application/json', } - res = self._api.do('GET', - f'/api/2.1/unity-catalog/tables/{table_name}/monitor/refreshes', - headers=headers) - return [MonitorRefreshInfo.from_dict(v) for v in res] + res = self._api.do('GET', '/api/2.1/unity-catalog/metastore_summary', headers=headers) + return GetMetastoreSummaryResponse.from_dict(res) - def run_refresh(self, table_name: str) -> MonitorRefreshInfo: - """Queue a metric refresh for a monitor. - - Queues a metric refresh on the monitor for the specified table. The refresh will execute in the - background. + def unassign(self, workspace_id: int, metastore_id: str): + """Delete an assignment. - The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the - table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: - - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an - owner of the table + Deletes a metastore assignment. The caller must be an account administrator. - Additionally, the call must be made from the workspace where the monitor was created. + :param workspace_id: int + A workspace ID. + :param metastore_id: str + Query for the ID of the metastore to delete. - :param table_name: str - Full name of the table. - :returns: :class:`MonitorRefreshInfo` """ + query = {} + if metastore_id is not None: query['metastore_id'] = metastore_id headers = {'Accept': 'application/json', } - res = self._api.do('POST', - f'/api/2.1/unity-catalog/tables/{table_name}/monitor/refreshes', - headers=headers) - return MonitorRefreshInfo.from_dict(res) + self._api.do('DELETE', + f'/api/2.1/unity-catalog/workspaces/{workspace_id}/metastore', + query=query, + headers=headers) def update(self, - table_name: str, - output_schema_name: str, + id: str, *, - baseline_table_name: Optional[str] = None, - custom_metrics: Optional[List[MonitorMetric]] = None, - data_classification_config: Optional[MonitorDataClassificationConfig] = None, - inference_log: Optional[MonitorInferenceLog] = None, - notifications: Optional[MonitorNotifications] = None, - schedule: Optional[MonitorCronSchedule] = None, - slicing_exprs: Optional[List[str]] = None, - snapshot: Optional[MonitorSnapshot] = None, - time_series: Optional[MonitorTimeSeries] = None) -> MonitorInfo: - """Update a table monitor. - - Updates a monitor for the specified table. - - The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the - table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: - - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an - owner of the table. - - Additionally, the call must be made from the workspace where the monitor was created, and the caller - must be the original creator of the monitor. + delta_sharing_organization_name: Optional[str] = None, + delta_sharing_recipient_token_lifetime_in_seconds: Optional[int] = None, + delta_sharing_scope: Optional[UpdateMetastoreDeltaSharingScope] = None, + new_name: Optional[str] = None, + owner: Optional[str] = None, + privilege_model_version: Optional[str] = None, + storage_root_credential_id: Optional[str] = None) -> MetastoreInfo: + """Update a metastore. - Certain configuration fields, such as output asset identifiers, cannot be updated. + Updates information for a specific metastore. The caller must be a metastore admin. If the __owner__ + field is set to the empty string (**""**), the ownership is updated to the System User. - :param table_name: str - Full name of the table. - :param output_schema_name: str - Schema where output metric tables are created. - :param baseline_table_name: str (optional) - Name of the baseline table from which drift metrics are computed from. Columns in the monitored - table should also be present in the baseline table. - :param custom_metrics: List[:class:`MonitorMetric`] (optional) - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics - (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). - :param data_classification_config: :class:`MonitorDataClassificationConfig` (optional) - The data classification config for the monitor. - :param inference_log: :class:`MonitorInferenceLog` (optional) - Configuration for monitoring inference logs. - :param notifications: :class:`MonitorNotifications` (optional) - The notification settings for the monitor. - :param schedule: :class:`MonitorCronSchedule` (optional) - The schedule for automatically updating and refreshing metric tables. - :param slicing_exprs: List[str] (optional) - List of column expressions to slice data with for targeted analysis. The data is grouped by each - expression independently, resulting in a separate slice for each predicate and its complements. For - high-cardinality columns, only the top 100 unique values by frequency will generate slices. - :param snapshot: :class:`MonitorSnapshot` (optional) - Configuration for monitoring snapshot tables. - :param time_series: :class:`MonitorTimeSeries` (optional) - Configuration for monitoring time series tables. + :param id: str + Unique ID of the metastore. + :param delta_sharing_organization_name: str (optional) + The organization name of a Delta Sharing entity, to be used in Databricks-to-Databricks Delta + Sharing as the official name. + :param delta_sharing_recipient_token_lifetime_in_seconds: int (optional) + The lifetime of delta sharing recipient token in seconds. + :param delta_sharing_scope: :class:`UpdateMetastoreDeltaSharingScope` (optional) + The scope of Delta Sharing enabled for the metastore. + :param new_name: str (optional) + New name for the metastore. + :param owner: str (optional) + The owner of the metastore. + :param privilege_model_version: str (optional) + Privilege model version of the metastore, of the form `major.minor` (e.g., `1.0`). + :param storage_root_credential_id: str (optional) + UUID of storage credential to access the metastore storage_root. - :returns: :class:`MonitorInfo` + :returns: :class:`MetastoreInfo` """ body = {} - if baseline_table_name is not None: body['baseline_table_name'] = baseline_table_name - if custom_metrics is not None: body['custom_metrics'] = [v.as_dict() for v in custom_metrics] - if data_classification_config is not None: - body['data_classification_config'] = data_classification_config.as_dict() - if inference_log is not None: body['inference_log'] = inference_log.as_dict() - if notifications is not None: body['notifications'] = notifications.as_dict() - if output_schema_name is not None: body['output_schema_name'] = output_schema_name - if schedule is not None: body['schedule'] = schedule.as_dict() - if slicing_exprs is not None: body['slicing_exprs'] = [v for v in slicing_exprs] - if snapshot is not None: body['snapshot'] = snapshot.as_dict() - if time_series is not None: body['time_series'] = time_series.as_dict() + if delta_sharing_organization_name is not None: + body['delta_sharing_organization_name'] = delta_sharing_organization_name + if delta_sharing_recipient_token_lifetime_in_seconds is not None: + body[ + 'delta_sharing_recipient_token_lifetime_in_seconds'] = delta_sharing_recipient_token_lifetime_in_seconds + if delta_sharing_scope is not None: body['delta_sharing_scope'] = delta_sharing_scope.value + if new_name is not None: body['new_name'] = new_name + if owner is not None: body['owner'] = owner + if privilege_model_version is not None: body['privilege_model_version'] = privilege_model_version + if storage_root_credential_id is not None: + body['storage_root_credential_id'] = storage_root_credential_id headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } - res = self._api.do('PUT', - f'/api/2.1/unity-catalog/tables/{table_name}/monitor', - body=body, - headers=headers) - return MonitorInfo.from_dict(res) - - -class MetastoresAPI: - """A metastore is the top-level container of objects in Unity Catalog. It stores data assets (tables and - views) and the permissions that govern access to them. Databricks account admins can create metastores and - assign them to Databricks workspaces to control which workloads use each metastore. For a workspace to use - Unity Catalog, it must have a Unity Catalog metastore attached. - - Each metastore is configured with a root storage location in a cloud storage account. This storage - location is used for metadata and managed tables data. - - NOTE: This metastore is distinct from the metastore included in Databricks workspaces created before Unity - Catalog was released. If your workspace includes a legacy Hive metastore, the data in that metastore is - available in a catalog named hive_metastore.""" - - def __init__(self, api_client): - self._api = api_client + res = self._api.do('PATCH', f'/api/2.1/unity-catalog/metastores/{id}', body=body, headers=headers) + return MetastoreInfo.from_dict(res) - def assign(self, workspace_id: int, metastore_id: str, default_catalog_name: str): - """Create an assignment. + def update_assignment(self, + workspace_id: int, + *, + default_catalog_name: Optional[str] = None, + metastore_id: Optional[str] = None): + """Update an assignment. - Creates a new metastore assignment. If an assignment for the same __workspace_id__ exists, it will be - overwritten by the new __metastore_id__ and __default_catalog_name__. The caller must be an account - admin. + Updates a metastore assignment. This operation can be used to update __metastore_id__ or + __default_catalog_name__ for a specified Workspace, if the Workspace is already assigned a metastore. + The caller must be an account admin to update __metastore_id__; otherwise, the caller can be a + Workspace admin. :param workspace_id: int A workspace ID. - :param metastore_id: str + :param default_catalog_name: str (optional) + The name of the default catalog for the metastore. + :param metastore_id: str (optional) The unique ID of the metastore. - :param default_catalog_name: str - The name of the default catalog in the metastore. """ @@ -7262,463 +7180,583 @@ def assign(self, workspace_id: int, metastore_id: str, default_catalog_name: str if metastore_id is not None: body['metastore_id'] = metastore_id headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } - self._api.do('PUT', + self._api.do('PATCH', f'/api/2.1/unity-catalog/workspaces/{workspace_id}/metastore', body=body, headers=headers) - def create(self, - name: str, - *, - region: Optional[str] = None, - storage_root: Optional[str] = None) -> MetastoreInfo: - """Create a metastore. + +class ModelVersionsAPI: + """Databricks provides a hosted version of MLflow Model Registry in Unity Catalog. Models in Unity Catalog + provide centralized access control, auditing, lineage, and discovery of ML models across Databricks + workspaces. + + This API reference documents the REST endpoints for managing model versions in Unity Catalog. For more + details, see the [registered models API docs](/api/workspace/registeredmodels).""" + + def __init__(self, api_client): + self._api = api_client + + def delete(self, full_name: str, version: int): + """Delete a Model Version. - Creates a new metastore based on a provided name and optional storage root path. By default (if the - __owner__ field is not set), the owner of the new metastore is the user calling the - __createMetastore__ API. If the __owner__ field is set to the empty string (**""**), the ownership is - assigned to the System User instead. + Deletes a model version from the specified registered model. Any aliases assigned to the model version + will also be deleted. - :param name: str - The user-specified name of the metastore. - :param region: str (optional) - Cloud region which the metastore serves (e.g., `us-west-2`, `westus`). If this field is omitted, the - region of the workspace receiving the request will be used. - :param storage_root: str (optional) - The storage root URL for metastore + The caller must be a metastore admin or an owner of the parent registered model. For the latter case, + the caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. - :returns: :class:`MetastoreInfo` - """ - body = {} - if name is not None: body['name'] = name - if region is not None: body['region'] = region - if storage_root is not None: body['storage_root'] = storage_root - headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } - - res = self._api.do('POST', '/api/2.1/unity-catalog/metastores', body=body, headers=headers) - return MetastoreInfo.from_dict(res) - - def current(self) -> MetastoreAssignment: - """Get metastore assignment for workspace. + :param full_name: str + The three-level (fully qualified) name of the model version + :param version: int + The integer version number of the model version - Gets the metastore assignment for the workspace being accessed. - :returns: :class:`MetastoreAssignment` """ - headers = {'Accept': 'application/json', } + headers = {} - res = self._api.do('GET', '/api/2.1/unity-catalog/current-metastore-assignment', headers=headers) - return MetastoreAssignment.from_dict(res) + self._api.do('DELETE', + f'/api/2.1/unity-catalog/models/{full_name}/versions/{version}', + headers=headers) - def delete(self, id: str, *, force: Optional[bool] = None): - """Delete a metastore. + def get(self, + full_name: str, + version: int, + *, + include_browse: Optional[bool] = None) -> RegisteredModelInfo: + """Get a Model Version. - Deletes a metastore. The caller must be a metastore admin. + Get a model version. - :param id: str - Unique ID of the metastore. - :param force: bool (optional) - Force deletion even if the metastore is not empty. Default is false. + The caller must be a metastore admin or an owner of (or have the **EXECUTE** privilege on) the parent + registered model. For the latter case, the caller must also be the owner or have the **USE_CATALOG** + privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + :param full_name: str + The three-level (fully qualified) name of the model version + :param version: int + The integer version number of the model version + :param include_browse: bool (optional) + Whether to include model versions in the response for which the principal can only access selective + metadata for + :returns: :class:`RegisteredModelInfo` """ query = {} - if force is not None: query['force'] = force + if include_browse is not None: query['include_browse'] = include_browse headers = {'Accept': 'application/json', } - self._api.do('DELETE', f'/api/2.1/unity-catalog/metastores/{id}', query=query, headers=headers) + res = self._api.do('GET', + f'/api/2.1/unity-catalog/models/{full_name}/versions/{version}', + query=query, + headers=headers) + return RegisteredModelInfo.from_dict(res) - def get(self, id: str) -> MetastoreInfo: - """Get a metastore. + def get_by_alias(self, full_name: str, alias: str) -> ModelVersionInfo: + """Get Model Version By Alias. - Gets a metastore that matches the supplied ID. The caller must be a metastore admin to retrieve this - info. + Get a model version by alias. - :param id: str - Unique ID of the metastore. + The caller must be a metastore admin or an owner of (or have the **EXECUTE** privilege on) the + registered model. For the latter case, the caller must also be the owner or have the **USE_CATALOG** + privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. - :returns: :class:`MetastoreInfo` + :param full_name: str + The three-level (fully qualified) name of the registered model + :param alias: str + The name of the alias + + :returns: :class:`ModelVersionInfo` """ headers = {'Accept': 'application/json', } - res = self._api.do('GET', f'/api/2.1/unity-catalog/metastores/{id}', headers=headers) - return MetastoreInfo.from_dict(res) + res = self._api.do('GET', + f'/api/2.1/unity-catalog/models/{full_name}/aliases/{alias}', + headers=headers) + return ModelVersionInfo.from_dict(res) - def list(self) -> Iterator[MetastoreInfo]: - """List metastores. + def list(self, + full_name: str, + *, + include_browse: Optional[bool] = None, + max_results: Optional[int] = None, + page_token: Optional[str] = None) -> Iterator[ModelVersionInfo]: + """List Model Versions. - Gets an array of the available metastores (as __MetastoreInfo__ objects). The caller must be an admin - to retrieve this info. There is no guarantee of a specific ordering of the elements in the array. + List model versions. You can list model versions under a particular schema, or list all model versions + in the current metastore. - :returns: Iterator over :class:`MetastoreInfo` - """ - - headers = {'Accept': 'application/json', } - - json = self._api.do('GET', '/api/2.1/unity-catalog/metastores', headers=headers) - parsed = ListMetastoresResponse.from_dict(json).metastores - return parsed if parsed is not None else [] - - def summary(self) -> GetMetastoreSummaryResponse: - """Get a metastore summary. + The returned models are filtered based on the privileges of the calling user. For example, the + metastore admin is able to list all the model versions. A regular user needs to be the owner or have + the **EXECUTE** privilege on the parent registered model to recieve the model versions in the + response. For the latter case, the caller must also be the owner or have the **USE_CATALOG** privilege + on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. - Gets information about a metastore. This summary includes the storage credential, the cloud vendor, - the cloud region, and the global metastore ID. + There is no guarantee of a specific ordering of the elements in the response. The elements in the + response will not contain any aliases or tags. - :returns: :class:`GetMetastoreSummaryResponse` + :param full_name: str + The full three-level name of the registered model under which to list model versions + :param include_browse: bool (optional) + Whether to include model versions in the response for which the principal can only access selective + metadata for + :param max_results: int (optional) + Maximum number of model versions to return. If not set, the page length is set to a server + configured value (100, as of 1/3/2024). - when set to a value greater than 0, the page length is the + minimum of this value and a server configured value(1000, as of 1/3/2024); - when set to 0, the page + length is set to a server configured value (100, as of 1/3/2024) (recommended); - when set to a + value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`ModelVersionInfo` """ + query = {} + if include_browse is not None: query['include_browse'] = include_browse + if max_results is not None: query['max_results'] = max_results + if page_token is not None: query['page_token'] = page_token headers = {'Accept': 'application/json', } - res = self._api.do('GET', '/api/2.1/unity-catalog/metastore_summary', headers=headers) - return GetMetastoreSummaryResponse.from_dict(res) + while True: + json = self._api.do('GET', + f'/api/2.1/unity-catalog/models/{full_name}/versions', + query=query, + headers=headers) + if 'model_versions' in json: + for v in json['model_versions']: + yield ModelVersionInfo.from_dict(v) + if 'next_page_token' not in json or not json['next_page_token']: + return + query['page_token'] = json['next_page_token'] - def unassign(self, workspace_id: int, metastore_id: str): - """Delete an assignment. + def update(self, full_name: str, version: int, *, comment: Optional[str] = None) -> ModelVersionInfo: + """Update a Model Version. - Deletes a metastore assignment. The caller must be an account administrator. + Updates the specified model version. - :param workspace_id: int - A workspace ID. - :param metastore_id: str - Query for the ID of the metastore to delete. + The caller must be a metastore admin or an owner of the parent registered model. For the latter case, + the caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + Currently only the comment of the model version can be updated. + :param full_name: str + The three-level (fully qualified) name of the model version + :param version: int + The integer version number of the model version + :param comment: str (optional) + The comment attached to the model version + :returns: :class:`ModelVersionInfo` """ + body = {} + if comment is not None: body['comment'] = comment + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } - query = {} - if metastore_id is not None: query['metastore_id'] = metastore_id - headers = {'Accept': 'application/json', } + res = self._api.do('PATCH', + f'/api/2.1/unity-catalog/models/{full_name}/versions/{version}', + body=body, + headers=headers) + return ModelVersionInfo.from_dict(res) - self._api.do('DELETE', - f'/api/2.1/unity-catalog/workspaces/{workspace_id}/metastore', - query=query, - headers=headers) - def update(self, - id: str, - *, - delta_sharing_organization_name: Optional[str] = None, - delta_sharing_recipient_token_lifetime_in_seconds: Optional[int] = None, - delta_sharing_scope: Optional[UpdateMetastoreDeltaSharingScope] = None, - new_name: Optional[str] = None, - owner: Optional[str] = None, - privilege_model_version: Optional[str] = None, - storage_root_credential_id: Optional[str] = None) -> MetastoreInfo: - """Update a metastore. +class OnlineTablesAPI: + """Online tables provide lower latency and higher QPS access to data from Delta tables.""" + + def __init__(self, api_client): + self._api = api_client + + def create(self, *, name: Optional[str] = None, spec: Optional[OnlineTableSpec] = None) -> OnlineTable: + """Create an Online Table. - Updates information for a specific metastore. The caller must be a metastore admin. If the __owner__ - field is set to the empty string (**""**), the ownership is updated to the System User. + Create a new Online Table. - :param id: str - Unique ID of the metastore. - :param delta_sharing_organization_name: str (optional) - The organization name of a Delta Sharing entity, to be used in Databricks-to-Databricks Delta - Sharing as the official name. - :param delta_sharing_recipient_token_lifetime_in_seconds: int (optional) - The lifetime of delta sharing recipient token in seconds. - :param delta_sharing_scope: :class:`UpdateMetastoreDeltaSharingScope` (optional) - The scope of Delta Sharing enabled for the metastore. - :param new_name: str (optional) - New name for the metastore. - :param owner: str (optional) - The owner of the metastore. - :param privilege_model_version: str (optional) - Privilege model version of the metastore, of the form `major.minor` (e.g., `1.0`). - :param storage_root_credential_id: str (optional) - UUID of storage credential to access the metastore storage_root. + :param name: str (optional) + Full three-part (catalog, schema, table) name of the table. + :param spec: :class:`OnlineTableSpec` (optional) + Specification of the online table. - :returns: :class:`MetastoreInfo` + :returns: :class:`OnlineTable` """ body = {} - if delta_sharing_organization_name is not None: - body['delta_sharing_organization_name'] = delta_sharing_organization_name - if delta_sharing_recipient_token_lifetime_in_seconds is not None: - body[ - 'delta_sharing_recipient_token_lifetime_in_seconds'] = delta_sharing_recipient_token_lifetime_in_seconds - if delta_sharing_scope is not None: body['delta_sharing_scope'] = delta_sharing_scope.value - if new_name is not None: body['new_name'] = new_name - if owner is not None: body['owner'] = owner - if privilege_model_version is not None: body['privilege_model_version'] = privilege_model_version - if storage_root_credential_id is not None: - body['storage_root_credential_id'] = storage_root_credential_id + if name is not None: body['name'] = name + if spec is not None: body['spec'] = spec.as_dict() headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } - res = self._api.do('PATCH', f'/api/2.1/unity-catalog/metastores/{id}', body=body, headers=headers) - return MetastoreInfo.from_dict(res) + res = self._api.do('POST', '/api/2.0/online-tables', body=body, headers=headers) + return OnlineTable.from_dict(res) - def update_assignment(self, - workspace_id: int, - *, - default_catalog_name: Optional[str] = None, - metastore_id: Optional[str] = None): - """Update an assignment. + def delete(self, name: str): + """Delete an Online Table. - Updates a metastore assignment. This operation can be used to update __metastore_id__ or - __default_catalog_name__ for a specified Workspace, if the Workspace is already assigned a metastore. - The caller must be an account admin to update __metastore_id__; otherwise, the caller can be a - Workspace admin. + Delete an online table. Warning: This will delete all the data in the online table. If the source + Delta table was deleted or modified since this Online Table was created, this will lose the data + forever! - :param workspace_id: int - A workspace ID. - :param default_catalog_name: str (optional) - The name of the default catalog for the metastore. - :param metastore_id: str (optional) - The unique ID of the metastore. + :param name: str + Full three-part (catalog, schema, table) name of the table. """ - body = {} - if default_catalog_name is not None: body['default_catalog_name'] = default_catalog_name - if metastore_id is not None: body['metastore_id'] = metastore_id - headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } - self._api.do('PATCH', - f'/api/2.1/unity-catalog/workspaces/{workspace_id}/metastore', - body=body, - headers=headers) + headers = {'Accept': 'application/json', } + self._api.do('DELETE', f'/api/2.0/online-tables/{name}', headers=headers) -class ModelVersionsAPI: - """Databricks provides a hosted version of MLflow Model Registry in Unity Catalog. Models in Unity Catalog - provide centralized access control, auditing, lineage, and discovery of ML models across Databricks - workspaces. + def get(self, name: str) -> OnlineTable: + """Get an Online Table. + + Get information about an existing online table and its status. + + :param name: str + Full three-part (catalog, schema, table) name of the table. + + :returns: :class:`OnlineTable` + """ + + headers = {'Accept': 'application/json', } + + res = self._api.do('GET', f'/api/2.0/online-tables/{name}', headers=headers) + return OnlineTable.from_dict(res) + + +class QualityMonitorsAPI: + """A monitor computes and monitors data or model quality metrics for a table over time. It generates metrics + tables and a dashboard that you can use to monitor table health and set alerts. - This API reference documents the REST endpoints for managing model versions in Unity Catalog. For more - details, see the [registered models API docs](/api/workspace/registeredmodels).""" + Most write operations require the user to be the owner of the table (or its parent schema or parent + catalog). Viewing the dashboard, computed metrics, or monitor configuration only requires the user to have + **SELECT** privileges on the table (along with **USE_SCHEMA** and **USE_CATALOG**).""" def __init__(self, api_client): self._api = api_client - def delete(self, full_name: str, version: int): - """Delete a Model Version. + def cancel_refresh(self, table_name: str, refresh_id: str): + """Cancel refresh. - Deletes a model version from the specified registered model. Any aliases assigned to the model version - will also be deleted. + Cancel an active monitor refresh for the given refresh ID. - The caller must be a metastore admin or an owner of the parent registered model. For the latter case, - the caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the - **USE_SCHEMA** privilege on the parent schema. + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an + owner of the table - :param full_name: str - The three-level (fully qualified) name of the model version - :param version: int - The integer version number of the model version + Additionally, the call must be made from the workspace where the monitor was created. + + :param table_name: str + Full name of the table. + :param refresh_id: str + ID of the refresh. """ headers = {} - self._api.do('DELETE', - f'/api/2.1/unity-catalog/models/{full_name}/versions/{version}', + self._api.do('POST', + f'/api/2.1/unity-catalog/tables/{table_name}/monitor/refreshes/{refresh_id}/cancel', headers=headers) - def get(self, - full_name: str, - version: int, - *, - include_browse: Optional[bool] = None) -> RegisteredModelInfo: - """Get a Model Version. + def create(self, + table_name: str, + assets_dir: str, + output_schema_name: str, + *, + baseline_table_name: Optional[str] = None, + custom_metrics: Optional[List[MonitorMetric]] = None, + data_classification_config: Optional[MonitorDataClassificationConfig] = None, + inference_log: Optional[MonitorInferenceLog] = None, + notifications: Optional[MonitorNotifications] = None, + schedule: Optional[MonitorCronSchedule] = None, + skip_builtin_dashboard: Optional[bool] = None, + slicing_exprs: Optional[List[str]] = None, + snapshot: Optional[MonitorSnapshot] = None, + time_series: Optional[MonitorTimeSeries] = None, + warehouse_id: Optional[str] = None) -> MonitorInfo: + """Create a table monitor. - Get a model version. + Creates a new monitor for the specified table. - The caller must be a metastore admin or an owner of (or have the **EXECUTE** privilege on) the parent - registered model. For the latter case, the caller must also be the owner or have the **USE_CATALOG** - privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + The caller must either: 1. be an owner of the table's parent catalog, have **USE_SCHEMA** on the + table's parent schema, and have **SELECT** access on the table 2. have **USE_CATALOG** on the table's + parent catalog, be an owner of the table's parent schema, and have **SELECT** access on the table. 3. + have the following permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on + the table's parent schema - be an owner of the table. - :param full_name: str - The three-level (fully qualified) name of the model version - :param version: int - The integer version number of the model version - :param include_browse: bool (optional) - Whether to include model versions in the response for which the principal can only access selective - metadata for + Workspace assets, such as the dashboard, will be created in the workspace where this call was made. - :returns: :class:`RegisteredModelInfo` + :param table_name: str + Full name of the table. + :param assets_dir: str + The directory to store monitoring assets (e.g. dashboard, metric tables). + :param output_schema_name: str + Schema where output metric tables are created. + :param baseline_table_name: str (optional) + Name of the baseline table from which drift metrics are computed from. Columns in the monitored + table should also be present in the baseline table. + :param custom_metrics: List[:class:`MonitorMetric`] (optional) + Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics + (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). + :param data_classification_config: :class:`MonitorDataClassificationConfig` (optional) + The data classification config for the monitor. + :param inference_log: :class:`MonitorInferenceLog` (optional) + Configuration for monitoring inference logs. + :param notifications: :class:`MonitorNotifications` (optional) + The notification settings for the monitor. + :param schedule: :class:`MonitorCronSchedule` (optional) + The schedule for automatically updating and refreshing metric tables. + :param skip_builtin_dashboard: bool (optional) + Whether to skip creating a default dashboard summarizing data quality metrics. + :param slicing_exprs: List[str] (optional) + List of column expressions to slice data with for targeted analysis. The data is grouped by each + expression independently, resulting in a separate slice for each predicate and its complements. For + high-cardinality columns, only the top 100 unique values by frequency will generate slices. + :param snapshot: :class:`MonitorSnapshot` (optional) + Configuration for monitoring snapshot tables. + :param time_series: :class:`MonitorTimeSeries` (optional) + Configuration for monitoring time series tables. + :param warehouse_id: str (optional) + Optional argument to specify the warehouse for dashboard creation. If not specified, the first + running warehouse will be used. + + :returns: :class:`MonitorInfo` """ + body = {} + if assets_dir is not None: body['assets_dir'] = assets_dir + if baseline_table_name is not None: body['baseline_table_name'] = baseline_table_name + if custom_metrics is not None: body['custom_metrics'] = [v.as_dict() for v in custom_metrics] + if data_classification_config is not None: + body['data_classification_config'] = data_classification_config.as_dict() + if inference_log is not None: body['inference_log'] = inference_log.as_dict() + if notifications is not None: body['notifications'] = notifications.as_dict() + if output_schema_name is not None: body['output_schema_name'] = output_schema_name + if schedule is not None: body['schedule'] = schedule.as_dict() + if skip_builtin_dashboard is not None: body['skip_builtin_dashboard'] = skip_builtin_dashboard + if slicing_exprs is not None: body['slicing_exprs'] = [v for v in slicing_exprs] + if snapshot is not None: body['snapshot'] = snapshot.as_dict() + if time_series is not None: body['time_series'] = time_series.as_dict() + if warehouse_id is not None: body['warehouse_id'] = warehouse_id + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } - query = {} - if include_browse is not None: query['include_browse'] = include_browse - headers = {'Accept': 'application/json', } - - res = self._api.do('GET', - f'/api/2.1/unity-catalog/models/{full_name}/versions/{version}', - query=query, + res = self._api.do('POST', + f'/api/2.1/unity-catalog/tables/{table_name}/monitor', + body=body, headers=headers) - return RegisteredModelInfo.from_dict(res) + return MonitorInfo.from_dict(res) - def get_by_alias(self, full_name: str, alias: str) -> ModelVersionInfo: - """Get Model Version By Alias. + def delete(self, table_name: str): + """Delete a table monitor. - Get a model version by alias. + Deletes a monitor for the specified table. - The caller must be a metastore admin or an owner of (or have the **EXECUTE** privilege on) the - registered model. For the latter case, the caller must also be the owner or have the **USE_CATALOG** - privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an + owner of the table. + + Additionally, the call must be made from the workspace where the monitor was created. + + Note that the metric tables and dashboard will not be deleted as part of this call; those assets must + be manually cleaned up (if desired). + + :param table_name: str + Full name of the table. - :param full_name: str - The three-level (fully qualified) name of the registered model - :param alias: str - The name of the alias - :returns: :class:`ModelVersionInfo` """ - headers = {'Accept': 'application/json', } + headers = {} - res = self._api.do('GET', - f'/api/2.1/unity-catalog/models/{full_name}/aliases/{alias}', - headers=headers) - return ModelVersionInfo.from_dict(res) + self._api.do('DELETE', f'/api/2.1/unity-catalog/tables/{table_name}/monitor', headers=headers) - def list(self, - full_name: str, - *, - include_browse: Optional[bool] = None, - max_results: Optional[int] = None, - page_token: Optional[str] = None) -> Iterator[ModelVersionInfo]: - """List Model Versions. + def get(self, table_name: str) -> MonitorInfo: + """Get a table monitor. - List model versions. You can list model versions under a particular schema, or list all model versions - in the current metastore. + Gets a monitor for the specified table. - The returned models are filtered based on the privileges of the calling user. For example, the - metastore admin is able to list all the model versions. A regular user needs to be the owner or have - the **EXECUTE** privilege on the parent registered model to recieve the model versions in the - response. For the latter case, the caller must also be the owner or have the **USE_CATALOG** privilege - on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema. 3. have the following + permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent + schema - **SELECT** privilege on the table. - There is no guarantee of a specific ordering of the elements in the response. The elements in the - response will not contain any aliases or tags. + The returned information includes configuration values, as well as information on assets created by + the monitor. Some information (e.g., dashboard) may be filtered out if the caller is in a different + workspace than where the monitor was created. - :param full_name: str - The full three-level name of the registered model under which to list model versions - :param include_browse: bool (optional) - Whether to include model versions in the response for which the principal can only access selective - metadata for - :param max_results: int (optional) - Maximum number of model versions to return. If not set, the page length is set to a server - configured value (100, as of 1/3/2024). - when set to a value greater than 0, the page length is the - minimum of this value and a server configured value(1000, as of 1/3/2024); - when set to 0, the page - length is set to a server configured value (100, as of 1/3/2024) (recommended); - when set to a - value less than 0, an invalid parameter error is returned; - :param page_token: str (optional) - Opaque pagination token to go to next page based on previous query. + :param table_name: str + Full name of the table. - :returns: Iterator over :class:`ModelVersionInfo` + :returns: :class:`MonitorInfo` """ - query = {} - if include_browse is not None: query['include_browse'] = include_browse - if max_results is not None: query['max_results'] = max_results - if page_token is not None: query['page_token'] = page_token headers = {'Accept': 'application/json', } - while True: - json = self._api.do('GET', - f'/api/2.1/unity-catalog/models/{full_name}/versions', - query=query, - headers=headers) - if 'model_versions' in json: - for v in json['model_versions']: - yield ModelVersionInfo.from_dict(v) - if 'next_page_token' not in json or not json['next_page_token']: - return - query['page_token'] = json['next_page_token'] + res = self._api.do('GET', f'/api/2.1/unity-catalog/tables/{table_name}/monitor', headers=headers) + return MonitorInfo.from_dict(res) - def update(self, full_name: str, version: int, *, comment: Optional[str] = None) -> ModelVersionInfo: - """Update a Model Version. + def get_refresh(self, table_name: str, refresh_id: str) -> MonitorRefreshInfo: + """Get refresh. - Updates the specified model version. + Gets info about a specific monitor refresh using the given refresh ID. - The caller must be a metastore admin or an owner of the parent registered model. For the latter case, - the caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the - **USE_SCHEMA** privilege on the parent schema. + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - + **SELECT** privilege on the table. - Currently only the comment of the model version can be updated. + Additionally, the call must be made from the workspace where the monitor was created. - :param full_name: str - The three-level (fully qualified) name of the model version - :param version: int - The integer version number of the model version - :param comment: str (optional) - The comment attached to the model version + :param table_name: str + Full name of the table. + :param refresh_id: str + ID of the refresh. - :returns: :class:`ModelVersionInfo` + :returns: :class:`MonitorRefreshInfo` """ - body = {} - if comment is not None: body['comment'] = comment - headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } - - res = self._api.do('PATCH', - f'/api/2.1/unity-catalog/models/{full_name}/versions/{version}', - body=body, - headers=headers) - return ModelVersionInfo.from_dict(res) - -class OnlineTablesAPI: - """Online tables provide lower latency and higher QPS access to data from Delta tables.""" + headers = {'Accept': 'application/json', } - def __init__(self, api_client): - self._api = api_client + res = self._api.do('GET', + f'/api/2.1/unity-catalog/tables/{table_name}/monitor/refreshes/{refresh_id}', + headers=headers) + return MonitorRefreshInfo.from_dict(res) - def create(self, *, name: Optional[str] = None, spec: Optional[OnlineTableSpec] = None) -> OnlineTable: - """Create an Online Table. + def list_refreshes(self, table_name: str) -> MonitorRefreshListResponse: + """List refreshes. - Create a new Online Table. + Gets an array containing the history of the most recent refreshes (up to 25) for this table. - :param name: str (optional) - Full three-part (catalog, schema, table) name of the table. - :param spec: :class:`OnlineTableSpec` (optional) - Specification of the online table. + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - + **SELECT** privilege on the table. - :returns: :class:`OnlineTable` + Additionally, the call must be made from the workspace where the monitor was created. + + :param table_name: str + Full name of the table. + + :returns: :class:`MonitorRefreshListResponse` """ - body = {} - if name is not None: body['name'] = name - if spec is not None: body['spec'] = spec.as_dict() - headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } - res = self._api.do('POST', '/api/2.0/online-tables', body=body, headers=headers) - return OnlineTable.from_dict(res) + headers = {'Accept': 'application/json', } - def delete(self, name: str): - """Delete an Online Table. + res = self._api.do('GET', + f'/api/2.1/unity-catalog/tables/{table_name}/monitor/refreshes', + headers=headers) + return MonitorRefreshListResponse.from_dict(res) + + def run_refresh(self, table_name: str) -> MonitorRefreshInfo: + """Queue a metric refresh for a monitor. - Delete an online table. Warning: This will delete all the data in the online table. If the source - Delta table was deleted or modified since this Online Table was created, this will lose the data - forever! + Queues a metric refresh on the monitor for the specified table. The refresh will execute in the + background. - :param name: str - Full three-part (catalog, schema, table) name of the table. + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an + owner of the table + Additionally, the call must be made from the workspace where the monitor was created. + + :param table_name: str + Full name of the table. + :returns: :class:`MonitorRefreshInfo` """ headers = {'Accept': 'application/json', } - self._api.do('DELETE', f'/api/2.0/online-tables/{name}', headers=headers) + res = self._api.do('POST', + f'/api/2.1/unity-catalog/tables/{table_name}/monitor/refreshes', + headers=headers) + return MonitorRefreshInfo.from_dict(res) - def get(self, name: str) -> OnlineTable: - """Get an Online Table. + def update(self, + table_name: str, + output_schema_name: str, + *, + baseline_table_name: Optional[str] = None, + custom_metrics: Optional[List[MonitorMetric]] = None, + dashboard_id: Optional[str] = None, + data_classification_config: Optional[MonitorDataClassificationConfig] = None, + inference_log: Optional[MonitorInferenceLog] = None, + notifications: Optional[MonitorNotifications] = None, + schedule: Optional[MonitorCronSchedule] = None, + slicing_exprs: Optional[List[str]] = None, + snapshot: Optional[MonitorSnapshot] = None, + time_series: Optional[MonitorTimeSeries] = None) -> MonitorInfo: + """Update a table monitor. - Get information about an existing online table and its status. + Updates a monitor for the specified table. - :param name: str - Full three-part (catalog, schema, table) name of the table. + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an + owner of the table. - :returns: :class:`OnlineTable` + Additionally, the call must be made from the workspace where the monitor was created, and the caller + must be the original creator of the monitor. + + Certain configuration fields, such as output asset identifiers, cannot be updated. + + :param table_name: str + Full name of the table. + :param output_schema_name: str + Schema where output metric tables are created. + :param baseline_table_name: str (optional) + Name of the baseline table from which drift metrics are computed from. Columns in the monitored + table should also be present in the baseline table. + :param custom_metrics: List[:class:`MonitorMetric`] (optional) + Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics + (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). + :param dashboard_id: str (optional) + Id of dashboard that visualizes the computed metrics. This can be empty if the monitor is in PENDING + state. + :param data_classification_config: :class:`MonitorDataClassificationConfig` (optional) + The data classification config for the monitor. + :param inference_log: :class:`MonitorInferenceLog` (optional) + Configuration for monitoring inference logs. + :param notifications: :class:`MonitorNotifications` (optional) + The notification settings for the monitor. + :param schedule: :class:`MonitorCronSchedule` (optional) + The schedule for automatically updating and refreshing metric tables. + :param slicing_exprs: List[str] (optional) + List of column expressions to slice data with for targeted analysis. The data is grouped by each + expression independently, resulting in a separate slice for each predicate and its complements. For + high-cardinality columns, only the top 100 unique values by frequency will generate slices. + :param snapshot: :class:`MonitorSnapshot` (optional) + Configuration for monitoring snapshot tables. + :param time_series: :class:`MonitorTimeSeries` (optional) + Configuration for monitoring time series tables. + + :returns: :class:`MonitorInfo` """ + body = {} + if baseline_table_name is not None: body['baseline_table_name'] = baseline_table_name + if custom_metrics is not None: body['custom_metrics'] = [v.as_dict() for v in custom_metrics] + if dashboard_id is not None: body['dashboard_id'] = dashboard_id + if data_classification_config is not None: + body['data_classification_config'] = data_classification_config.as_dict() + if inference_log is not None: body['inference_log'] = inference_log.as_dict() + if notifications is not None: body['notifications'] = notifications.as_dict() + if output_schema_name is not None: body['output_schema_name'] = output_schema_name + if schedule is not None: body['schedule'] = schedule.as_dict() + if slicing_exprs is not None: body['slicing_exprs'] = [v for v in slicing_exprs] + if snapshot is not None: body['snapshot'] = snapshot.as_dict() + if time_series is not None: body['time_series'] = time_series.as_dict() + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } - headers = {'Accept': 'application/json', } - - res = self._api.do('GET', f'/api/2.0/online-tables/{name}', headers=headers) - return OnlineTable.from_dict(res) + res = self._api.do('PUT', + f'/api/2.1/unity-catalog/tables/{table_name}/monitor', + body=body, + headers=headers) + return MonitorInfo.from_dict(res) class RegisteredModelsAPI: @@ -8450,7 +8488,7 @@ class SystemSchemasAPI: def __init__(self, api_client): self._api = api_client - def disable(self, metastore_id: str, schema_name: DisableSchemaName): + def disable(self, metastore_id: str, schema_name: str): """Disable a system schema. Disables the system schema and removes it from the system catalog. The caller must be an account admin @@ -8458,7 +8496,7 @@ def disable(self, metastore_id: str, schema_name: DisableSchemaName): :param metastore_id: str The metastore ID under which the system schema lives. - :param schema_name: :class:`DisableSchemaName` + :param schema_name: str Full name of the system schema. @@ -8467,10 +8505,10 @@ def disable(self, metastore_id: str, schema_name: DisableSchemaName): headers = {'Accept': 'application/json', } self._api.do('DELETE', - f'/api/2.1/unity-catalog/metastores/{metastore_id}/systemschemas/{schema_name.value}', + f'/api/2.1/unity-catalog/metastores/{metastore_id}/systemschemas/{schema_name}', headers=headers) - def enable(self, metastore_id: str, schema_name: EnableSchemaName): + def enable(self, metastore_id: str, schema_name: str): """Enable a system schema. Enables the system schema and adds it to the system catalog. The caller must be an account admin or a @@ -8478,7 +8516,7 @@ def enable(self, metastore_id: str, schema_name: EnableSchemaName): :param metastore_id: str The metastore ID under which the system schema lives. - :param schema_name: :class:`EnableSchemaName` + :param schema_name: str Full name of the system schema. @@ -8487,7 +8525,7 @@ def enable(self, metastore_id: str, schema_name: EnableSchemaName): headers = {'Accept': 'application/json', } self._api.do('PUT', - f'/api/2.1/unity-catalog/metastores/{metastore_id}/systemschemas/{schema_name.value}', + f'/api/2.1/unity-catalog/metastores/{metastore_id}/systemschemas/{schema_name}', headers=headers) def list(self, metastore_id: str) -> Iterator[SystemSchemaInfo]: diff --git a/databricks/sdk/service/compute.py b/databricks/sdk/service/compute.py index 7207252f5..db5f550e3 100755 --- a/databricks/sdk/service/compute.py +++ b/databricks/sdk/service/compute.py @@ -529,10 +529,6 @@ class ClusterAttributes: """Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string.""" - cluster_source: Optional[ClusterSource] = None - """Determines whether the cluster was created by a user through the UI, created by the Databricks - Jobs Scheduler, or through an API request. This is the same as cluster_creator, but read only.""" - custom_tags: Optional[Dict[str, str]] = None """Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: @@ -551,8 +547,12 @@ class ClusterAttributes: features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in - this mode. But programming languages and cluster features might be limited. * - `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * + this mode. But programming languages and cluster features might be limited. + + The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for + future Databricks Runtime versions: + + * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.""" @@ -637,7 +637,6 @@ def as_dict(self) -> dict: if self.azure_attributes: body['azure_attributes'] = self.azure_attributes.as_dict() if self.cluster_log_conf: body['cluster_log_conf'] = self.cluster_log_conf.as_dict() if self.cluster_name is not None: body['cluster_name'] = self.cluster_name - if self.cluster_source is not None: body['cluster_source'] = self.cluster_source.value if self.custom_tags: body['custom_tags'] = self.custom_tags if self.data_security_mode is not None: body['data_security_mode'] = self.data_security_mode.value if self.docker_image: body['docker_image'] = self.docker_image.as_dict() @@ -669,7 +668,6 @@ def from_dict(cls, d: Dict[str, any]) -> ClusterAttributes: azure_attributes=_from_dict(d, 'azure_attributes', AzureAttributes), cluster_log_conf=_from_dict(d, 'cluster_log_conf', ClusterLogConf), cluster_name=d.get('cluster_name', None), - cluster_source=_enum(d, 'cluster_source', ClusterSource), custom_tags=d.get('custom_tags', None), data_security_mode=_enum(d, 'data_security_mode', DataSecurityMode), docker_image=_from_dict(d, 'docker_image', DockerImage), @@ -763,8 +761,12 @@ class ClusterDetails: features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in - this mode. But programming languages and cluster features might be limited. * - `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * + this mode. But programming languages and cluster features might be limited. + + The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for + future Databricks Runtime versions: + + * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.""" @@ -882,7 +884,7 @@ class ClusterDetails: """The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call.""" - spec: Optional[CreateCluster] = None + spec: Optional[ClusterSpec] = None """`spec` contains a snapshot of the field values that were used to create or edit this cluster. The contents of `spec` can be used in the body of a create cluster request. This field might not be populated for older clusters. Note: not included in the response of the ListClusters API.""" @@ -1005,7 +1007,7 @@ def from_dict(cls, d: Dict[str, any]) -> ClusterDetails: spark_context_id=d.get('spark_context_id', None), spark_env_vars=d.get('spark_env_vars', None), spark_version=d.get('spark_version', None), - spec=_from_dict(d, 'spec', CreateCluster), + spec=_from_dict(d, 'spec', ClusterSpec), ssh_public_keys=d.get('ssh_public_keys', None), start_time=d.get('start_time', None), state=_enum(d, 'state', State), @@ -1418,6 +1420,8 @@ class ClusterSource(Enum): @dataclass class ClusterSpec: apply_policy_default_values: Optional[bool] = None + """When set to true, fixed and default values from the policy will be used for fields that are + omitted. When set to false, only fixed values from the policy will be applied.""" autoscale: Optional[AutoScale] = None """Parameters needed in order to automatically scale clusters up and down based on load. Note: @@ -1437,10 +1441,6 @@ class ClusterSpec: """Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used.""" - clone_from: Optional[CloneCluster] = None - """When specified, this clones libraries from a source cluster during the creation of a new - cluster.""" - cluster_log_conf: Optional[ClusterLogConf] = None """The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. @@ -1452,10 +1452,6 @@ class ClusterSpec: """Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string.""" - cluster_source: Optional[ClusterSource] = None - """Determines whether the cluster was created by a user through the UI, created by the Databricks - Jobs Scheduler, or through an API request. This is the same as cluster_creator, but read only.""" - custom_tags: Optional[Dict[str, str]] = None """Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: @@ -1474,8 +1470,12 @@ class ClusterSpec: features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in - this mode. But programming languages and cluster features might be limited. * - `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * + this mode. But programming languages and cluster features might be limited. + + The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for + future Databricks Runtime versions: + + * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.""" @@ -1575,10 +1575,8 @@ def as_dict(self) -> dict: body['autotermination_minutes'] = self.autotermination_minutes if self.aws_attributes: body['aws_attributes'] = self.aws_attributes.as_dict() if self.azure_attributes: body['azure_attributes'] = self.azure_attributes.as_dict() - if self.clone_from: body['clone_from'] = self.clone_from.as_dict() if self.cluster_log_conf: body['cluster_log_conf'] = self.cluster_log_conf.as_dict() if self.cluster_name is not None: body['cluster_name'] = self.cluster_name - if self.cluster_source is not None: body['cluster_source'] = self.cluster_source.value if self.custom_tags: body['custom_tags'] = self.custom_tags if self.data_security_mode is not None: body['data_security_mode'] = self.data_security_mode.value if self.docker_image: body['docker_image'] = self.docker_image.as_dict() @@ -1611,10 +1609,8 @@ def from_dict(cls, d: Dict[str, any]) -> ClusterSpec: autotermination_minutes=d.get('autotermination_minutes', None), aws_attributes=_from_dict(d, 'aws_attributes', AwsAttributes), azure_attributes=_from_dict(d, 'azure_attributes', AzureAttributes), - clone_from=_from_dict(d, 'clone_from', CloneCluster), cluster_log_conf=_from_dict(d, 'cluster_log_conf', ClusterLogConf), cluster_name=d.get('cluster_name', None), - cluster_source=_enum(d, 'cluster_source', ClusterSource), custom_tags=d.get('custom_tags', None), data_security_mode=_enum(d, 'data_security_mode', DataSecurityMode), docker_image=_from_dict(d, 'docker_image', DockerImage), @@ -1637,28 +1633,6 @@ def from_dict(cls, d: Dict[str, any]) -> ClusterSpec: workload_type=_from_dict(d, 'workload_type', WorkloadType)) -@dataclass -class ClusterStatusResponse: - cluster_id: Optional[str] = None - """Unique identifier for the cluster.""" - - library_statuses: Optional[List[LibraryFullStatus]] = None - """Status of all libraries on the cluster.""" - - def as_dict(self) -> dict: - """Serializes the ClusterStatusResponse into a dictionary suitable for use as a JSON request body.""" - body = {} - if self.cluster_id is not None: body['cluster_id'] = self.cluster_id - if self.library_statuses: body['library_statuses'] = [v.as_dict() for v in self.library_statuses] - return body - - @classmethod - def from_dict(cls, d: Dict[str, any]) -> ClusterStatusResponse: - """Deserializes the ClusterStatusResponse from a dictionary.""" - return cls(cluster_id=d.get('cluster_id', None), - library_statuses=_repeated_dict(d, 'library_statuses', LibraryFullStatus)) - - @dataclass class Command: cluster_id: Optional[str] = None @@ -1757,6 +1731,8 @@ class CreateCluster: be retrieved by using the :method:clusters/sparkVersions API call.""" apply_policy_default_values: Optional[bool] = None + """When set to true, fixed and default values from the policy will be used for fields that are + omitted. When set to false, only fixed values from the policy will be applied.""" autoscale: Optional[AutoScale] = None """Parameters needed in order to automatically scale clusters up and down based on load. Note: @@ -1791,10 +1767,6 @@ class CreateCluster: """Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string.""" - cluster_source: Optional[ClusterSource] = None - """Determines whether the cluster was created by a user through the UI, created by the Databricks - Jobs Scheduler, or through an API request. This is the same as cluster_creator, but read only.""" - custom_tags: Optional[Dict[str, str]] = None """Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: @@ -1813,8 +1785,12 @@ class CreateCluster: features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in - this mode. But programming languages and cluster features might be limited. * - `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * + this mode. But programming languages and cluster features might be limited. + + The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for + future Databricks Runtime versions: + + * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.""" @@ -1913,7 +1889,6 @@ def as_dict(self) -> dict: if self.clone_from: body['clone_from'] = self.clone_from.as_dict() if self.cluster_log_conf: body['cluster_log_conf'] = self.cluster_log_conf.as_dict() if self.cluster_name is not None: body['cluster_name'] = self.cluster_name - if self.cluster_source is not None: body['cluster_source'] = self.cluster_source.value if self.custom_tags: body['custom_tags'] = self.custom_tags if self.data_security_mode is not None: body['data_security_mode'] = self.data_security_mode.value if self.docker_image: body['docker_image'] = self.docker_image.as_dict() @@ -1949,7 +1924,6 @@ def from_dict(cls, d: Dict[str, any]) -> CreateCluster: clone_from=_from_dict(d, 'clone_from', CloneCluster), cluster_log_conf=_from_dict(d, 'cluster_log_conf', ClusterLogConf), cluster_name=d.get('cluster_name', None), - cluster_source=_enum(d, 'cluster_source', ClusterSource), custom_tags=d.get('custom_tags', None), data_security_mode=_enum(d, 'data_security_mode', DataSecurityMode), docker_image=_from_dict(d, 'docker_image', DockerImage), @@ -2287,8 +2261,12 @@ class DataSecurityMode(Enum): features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in - this mode. But programming languages and cluster features might be limited. * - `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * + this mode. But programming languages and cluster features might be limited. + + The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for + future Databricks Runtime versions: + + * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.""" @@ -2601,6 +2579,8 @@ class EditCluster: be retrieved by using the :method:clusters/sparkVersions API call.""" apply_policy_default_values: Optional[bool] = None + """When set to true, fixed and default values from the policy will be used for fields that are + omitted. When set to false, only fixed values from the policy will be applied.""" autoscale: Optional[AutoScale] = None """Parameters needed in order to automatically scale clusters up and down based on load. Note: @@ -2620,10 +2600,6 @@ class EditCluster: """Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used.""" - clone_from: Optional[CloneCluster] = None - """When specified, this clones libraries from a source cluster during the creation of a new - cluster.""" - cluster_log_conf: Optional[ClusterLogConf] = None """The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. @@ -2635,10 +2611,6 @@ class EditCluster: """Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string.""" - cluster_source: Optional[ClusterSource] = None - """Determines whether the cluster was created by a user through the UI, created by the Databricks - Jobs Scheduler, or through an API request. This is the same as cluster_creator, but read only.""" - custom_tags: Optional[Dict[str, str]] = None """Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: @@ -2657,8 +2629,12 @@ class EditCluster: features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in - this mode. But programming languages and cluster features might be limited. * - `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * + this mode. But programming languages and cluster features might be limited. + + The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for + future Databricks Runtime versions: + + * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.""" @@ -2754,11 +2730,9 @@ def as_dict(self) -> dict: body['autotermination_minutes'] = self.autotermination_minutes if self.aws_attributes: body['aws_attributes'] = self.aws_attributes.as_dict() if self.azure_attributes: body['azure_attributes'] = self.azure_attributes.as_dict() - if self.clone_from: body['clone_from'] = self.clone_from.as_dict() if self.cluster_id is not None: body['cluster_id'] = self.cluster_id if self.cluster_log_conf: body['cluster_log_conf'] = self.cluster_log_conf.as_dict() if self.cluster_name is not None: body['cluster_name'] = self.cluster_name - if self.cluster_source is not None: body['cluster_source'] = self.cluster_source.value if self.custom_tags: body['custom_tags'] = self.custom_tags if self.data_security_mode is not None: body['data_security_mode'] = self.data_security_mode.value if self.docker_image: body['docker_image'] = self.docker_image.as_dict() @@ -2791,11 +2765,9 @@ def from_dict(cls, d: Dict[str, any]) -> EditCluster: autotermination_minutes=d.get('autotermination_minutes', None), aws_attributes=_from_dict(d, 'aws_attributes', AwsAttributes), azure_attributes=_from_dict(d, 'azure_attributes', AzureAttributes), - clone_from=_from_dict(d, 'clone_from', CloneCluster), cluster_id=d.get('cluster_id', None), cluster_log_conf=_from_dict(d, 'cluster_log_conf', ClusterLogConf), cluster_name=d.get('cluster_name', None), - cluster_source=_enum(d, 'cluster_source', ClusterSource), custom_tags=d.get('custom_tags', None), data_security_mode=_enum(d, 'data_security_mode', DataSecurityMode), docker_image=_from_dict(d, 'docker_image', DockerImage), @@ -6298,7 +6270,6 @@ def create(self, clone_from: Optional[CloneCluster] = None, cluster_log_conf: Optional[ClusterLogConf] = None, cluster_name: Optional[str] = None, - cluster_source: Optional[ClusterSource] = None, custom_tags: Optional[Dict[str, str]] = None, data_security_mode: Optional[DataSecurityMode] = None, docker_image: Optional[DockerImage] = None, @@ -6331,6 +6302,8 @@ def create(self, The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. :param apply_policy_default_values: bool (optional) + When set to true, fixed and default values from the policy will be used for fields that are omitted. + When set to false, only fixed values from the policy will be applied. :param autoscale: :class:`AutoScale` (optional) Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. @@ -6355,9 +6328,6 @@ def create(self, :param cluster_name: str (optional) Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. - :param cluster_source: :class:`ClusterSource` (optional) - Determines whether the cluster was created by a user through the UI, created by the Databricks Jobs - Scheduler, or through an API request. This is the same as cluster_creator, but read only. :param custom_tags: Dict[str,str] (optional) Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: @@ -6374,10 +6344,15 @@ def create(self, governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages - and cluster features might be limited. * `LEGACY_TABLE_ACL`: This mode is for users migrating from - legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy - Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating - from legacy Passthrough on standard clusters. + and cluster features might be limited. + + The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for + future Databricks Runtime versions: + + * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * + `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency + clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on + standard clusters. :param docker_image: :class:`DockerImage` (optional) :param driver_instance_pool_id: str (optional) The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses @@ -6457,7 +6432,6 @@ def create(self, if clone_from is not None: body['clone_from'] = clone_from.as_dict() if cluster_log_conf is not None: body['cluster_log_conf'] = cluster_log_conf.as_dict() if cluster_name is not None: body['cluster_name'] = cluster_name - if cluster_source is not None: body['cluster_source'] = cluster_source.value if custom_tags is not None: body['custom_tags'] = custom_tags if data_security_mode is not None: body['data_security_mode'] = data_security_mode.value if docker_image is not None: body['docker_image'] = docker_image.as_dict() @@ -6498,7 +6472,6 @@ def create_and_wait( clone_from: Optional[CloneCluster] = None, cluster_log_conf: Optional[ClusterLogConf] = None, cluster_name: Optional[str] = None, - cluster_source: Optional[ClusterSource] = None, custom_tags: Optional[Dict[str, str]] = None, data_security_mode: Optional[DataSecurityMode] = None, docker_image: Optional[DockerImage] = None, @@ -6527,7 +6500,6 @@ def create_and_wait( clone_from=clone_from, cluster_log_conf=cluster_log_conf, cluster_name=cluster_name, - cluster_source=cluster_source, custom_tags=custom_tags, data_security_mode=data_security_mode, docker_image=docker_image, @@ -6584,10 +6556,8 @@ def edit(self, autotermination_minutes: Optional[int] = None, aws_attributes: Optional[AwsAttributes] = None, azure_attributes: Optional[AzureAttributes] = None, - clone_from: Optional[CloneCluster] = None, cluster_log_conf: Optional[ClusterLogConf] = None, cluster_name: Optional[str] = None, - cluster_source: Optional[ClusterSource] = None, custom_tags: Optional[Dict[str, str]] = None, data_security_mode: Optional[DataSecurityMode] = None, docker_image: Optional[DockerImage] = None, @@ -6627,6 +6597,8 @@ def edit(self, The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. :param apply_policy_default_values: bool (optional) + When set to true, fixed and default values from the policy will be used for fields that are omitted. + When set to false, only fixed values from the policy will be applied. :param autoscale: :class:`AutoScale` (optional) Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. @@ -6640,8 +6612,6 @@ def edit(self, :param azure_attributes: :class:`AzureAttributes` (optional) Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. - :param clone_from: :class:`CloneCluster` (optional) - When specified, this clones libraries from a source cluster during the creation of a new cluster. :param cluster_log_conf: :class:`ClusterLogConf` (optional) The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If @@ -6651,9 +6621,6 @@ def edit(self, :param cluster_name: str (optional) Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. - :param cluster_source: :class:`ClusterSource` (optional) - Determines whether the cluster was created by a user through the UI, created by the Databricks Jobs - Scheduler, or through an API request. This is the same as cluster_creator, but read only. :param custom_tags: Dict[str,str] (optional) Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: @@ -6670,10 +6637,15 @@ def edit(self, governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages - and cluster features might be limited. * `LEGACY_TABLE_ACL`: This mode is for users migrating from - legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy - Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating - from legacy Passthrough on standard clusters. + and cluster features might be limited. + + The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for + future Databricks Runtime versions: + + * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * + `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency + clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on + standard clusters. :param docker_image: :class:`DockerImage` (optional) :param driver_instance_pool_id: str (optional) The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses @@ -6750,11 +6722,9 @@ def edit(self, if autotermination_minutes is not None: body['autotermination_minutes'] = autotermination_minutes if aws_attributes is not None: body['aws_attributes'] = aws_attributes.as_dict() if azure_attributes is not None: body['azure_attributes'] = azure_attributes.as_dict() - if clone_from is not None: body['clone_from'] = clone_from.as_dict() if cluster_id is not None: body['cluster_id'] = cluster_id if cluster_log_conf is not None: body['cluster_log_conf'] = cluster_log_conf.as_dict() if cluster_name is not None: body['cluster_name'] = cluster_name - if cluster_source is not None: body['cluster_source'] = cluster_source.value if custom_tags is not None: body['custom_tags'] = custom_tags if data_security_mode is not None: body['data_security_mode'] = data_security_mode.value if docker_image is not None: body['docker_image'] = docker_image.as_dict() @@ -6793,10 +6763,8 @@ def edit_and_wait( autotermination_minutes: Optional[int] = None, aws_attributes: Optional[AwsAttributes] = None, azure_attributes: Optional[AzureAttributes] = None, - clone_from: Optional[CloneCluster] = None, cluster_log_conf: Optional[ClusterLogConf] = None, cluster_name: Optional[str] = None, - cluster_source: Optional[ClusterSource] = None, custom_tags: Optional[Dict[str, str]] = None, data_security_mode: Optional[DataSecurityMode] = None, docker_image: Optional[DockerImage] = None, @@ -6822,11 +6790,9 @@ def edit_and_wait( autotermination_minutes=autotermination_minutes, aws_attributes=aws_attributes, azure_attributes=azure_attributes, - clone_from=clone_from, cluster_id=cluster_id, cluster_log_conf=cluster_log_conf, cluster_name=cluster_name, - cluster_source=cluster_source, custom_tags=custom_tags, data_security_mode=data_security_mode, docker_image=docker_image, @@ -8127,19 +8093,20 @@ class LibrariesAPI: def __init__(self, api_client): self._api = api_client - def all_cluster_statuses(self) -> ListAllClusterLibraryStatusesResponse: + def all_cluster_statuses(self) -> Iterator[ClusterLibraryStatuses]: """Get all statuses. Get the status of all libraries on all clusters. A status is returned for all libraries installed on this cluster via the API or the libraries UI. - :returns: :class:`ListAllClusterLibraryStatusesResponse` + :returns: Iterator over :class:`ClusterLibraryStatuses` """ headers = {'Accept': 'application/json', } - res = self._api.do('GET', '/api/2.0/libraries/all-cluster-statuses', headers=headers) - return ListAllClusterLibraryStatusesResponse.from_dict(res) + json = self._api.do('GET', '/api/2.0/libraries/all-cluster-statuses', headers=headers) + parsed = ListAllClusterLibraryStatusesResponse.from_dict(json).statuses + return parsed if parsed is not None else [] def cluster_status(self, cluster_id: str) -> Iterator[LibraryFullStatus]: """Get status. @@ -8161,7 +8128,7 @@ def cluster_status(self, cluster_id: str) -> Iterator[LibraryFullStatus]: headers = {'Accept': 'application/json', } json = self._api.do('GET', '/api/2.0/libraries/cluster-status', query=query, headers=headers) - parsed = ClusterStatusResponse.from_dict(json).library_statuses + parsed = ClusterLibraryStatuses.from_dict(json).library_statuses return parsed if parsed is not None else [] def install(self, cluster_id: str, libraries: List[Library]): diff --git a/databricks/sdk/service/jobs.py b/databricks/sdk/service/jobs.py index a11dc6c82..27cb330ff 100755 --- a/databricks/sdk/service/jobs.py +++ b/databricks/sdk/service/jobs.py @@ -2179,8 +2179,6 @@ class RepairRun: pipeline_params: Optional[PipelineParams] = None python_named_params: Optional[Dict[str, str]] = None - """A map from keys to values for jobs with Python wheel task, for example `"python_named_params": - {"name": "task", "data": "dbfs:/path/to/data.json"}`.""" python_params: Optional[List[str]] = None """A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", @@ -2450,6 +2448,7 @@ def from_dict(cls, d: Dict[str, any]) -> ResolvedStringParamsValues: @dataclass class ResolvedValues: + condition_task: Optional[ResolvedConditionTaskValues] = None dbt_task: Optional[ResolvedDbtTaskValues] = None @@ -2856,8 +2855,6 @@ class RunJobTask: pipeline_params: Optional[PipelineParams] = None python_named_params: Optional[Dict[str, str]] = None - """A map from keys to values for jobs with Python wheel task, for example `"python_named_params": - {"name": "task", "data": "dbfs:/path/to/data.json"}`.""" python_params: Optional[List[str]] = None """A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", @@ -3006,8 +3003,6 @@ class RunNow: pipeline_params: Optional[PipelineParams] = None python_named_params: Optional[Dict[str, str]] = None - """A map from keys to values for jobs with Python wheel task, for example `"python_named_params": - {"name": "task", "data": "dbfs:/path/to/data.json"}`.""" python_params: Optional[List[str]] = None """A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", @@ -3217,8 +3212,6 @@ class RunParameters: pipeline_params: Optional[PipelineParams] = None python_named_params: Optional[Dict[str, str]] = None - """A map from keys to values for jobs with Python wheel task, for example `"python_named_params": - {"name": "task", "data": "dbfs:/path/to/data.json"}`.""" python_params: Optional[List[str]] = None """A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", @@ -5586,8 +5579,6 @@ def repair_run(self, [dbutils.widgets.get]: https://docs.databricks.com/dev-tools/databricks-utils.html :param pipeline_params: :class:`PipelineParams` (optional) :param python_named_params: Dict[str,str] (optional) - A map from keys to values for jobs with Python wheel task, for example `"python_named_params": - {"name": "task", "data": "dbfs:/path/to/data.json"}`. :param python_params: List[str] (optional) A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it @@ -5777,8 +5768,6 @@ def run_now(self, [dbutils.widgets.get]: https://docs.databricks.com/dev-tools/databricks-utils.html :param pipeline_params: :class:`PipelineParams` (optional) :param python_named_params: Dict[str,str] (optional) - A map from keys to values for jobs with Python wheel task, for example `"python_named_params": - {"name": "task", "data": "dbfs:/path/to/data.json"}`. :param python_params: List[str] (optional) A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it diff --git a/databricks/sdk/service/marketplace.py b/databricks/sdk/service/marketplace.py index d559d98f0..1a6bc3974 100755 --- a/databricks/sdk/service/marketplace.py +++ b/databricks/sdk/service/marketplace.py @@ -1908,34 +1908,6 @@ class SortBy(Enum): SORT_BY_UNSPECIFIED = 'SORT_BY_UNSPECIFIED' -@dataclass -class SortBySpec: - sort_by: SortBy - """The field on which to sort the listing.""" - - sort_order: SortOrder - """The order in which to sort the listing.""" - - def as_dict(self) -> dict: - """Serializes the SortBySpec into a dictionary suitable for use as a JSON request body.""" - body = {} - if self.sort_by is not None: body['sort_by'] = self.sort_by.value - if self.sort_order is not None: body['sort_order'] = self.sort_order.value - return body - - @classmethod - def from_dict(cls, d: Dict[str, any]) -> SortBySpec: - """Deserializes the SortBySpec from a dictionary.""" - return cls(sort_by=_enum(d, 'sort_by', SortBy), sort_order=_enum(d, 'sort_order', SortOrder)) - - -class SortOrder(Enum): - - SORT_ORDER_ASCENDING = 'SORT_ORDER_ASCENDING' - SORT_ORDER_DESCENDING = 'SORT_ORDER_DESCENDING' - SORT_ORDER_UNSPECIFIED = 'SORT_ORDER_UNSPECIFIED' - - @dataclass class TokenDetail: bearer_token: Optional[str] = None @@ -2579,13 +2551,14 @@ def list(self, *, assets: Optional[List[AssetType]] = None, categories: Optional[List[Category]] = None, + is_ascending: Optional[bool] = None, is_free: Optional[bool] = None, is_private_exchange: Optional[bool] = None, is_staff_pick: Optional[bool] = None, page_size: Optional[int] = None, page_token: Optional[str] = None, provider_ids: Optional[List[str]] = None, - sort_by_spec: Optional[SortBySpec] = None, + sort_by: Optional[SortBy] = None, tags: Optional[List[ListingTag]] = None) -> Iterator[Listing]: """List listings. @@ -2595,6 +2568,7 @@ def list(self, Matches any of the following asset types :param categories: List[:class:`Category`] (optional) Matches any of the following categories + :param is_ascending: bool (optional) :param is_free: bool (optional) Filters each listing based on if it is free. :param is_private_exchange: bool (optional) @@ -2605,7 +2579,7 @@ def list(self, :param page_token: str (optional) :param provider_ids: List[str] (optional) Matches any of the following provider ids - :param sort_by_spec: :class:`SortBySpec` (optional) + :param sort_by: :class:`SortBy` (optional) Criteria for sorting the resulting set of listings. :param tags: List[:class:`ListingTag`] (optional) Matches any of the following tags @@ -2616,13 +2590,14 @@ def list(self, query = {} if assets is not None: query['assets'] = [v.value for v in assets] if categories is not None: query['categories'] = [v.value for v in categories] + if is_ascending is not None: query['is_ascending'] = is_ascending if is_free is not None: query['is_free'] = is_free if is_private_exchange is not None: query['is_private_exchange'] = is_private_exchange if is_staff_pick is not None: query['is_staff_pick'] = is_staff_pick if page_size is not None: query['page_size'] = page_size if page_token is not None: query['page_token'] = page_token if provider_ids is not None: query['provider_ids'] = [v for v in provider_ids] - if sort_by_spec is not None: query['sort_by_spec'] = sort_by_spec.as_dict() + if sort_by is not None: query['sort_by'] = sort_by.value if tags is not None: query['tags'] = [v.as_dict() for v in tags] headers = {'Accept': 'application/json', } @@ -2640,6 +2615,7 @@ def search(self, *, assets: Optional[List[AssetType]] = None, categories: Optional[List[Category]] = None, + is_ascending: Optional[bool] = None, is_free: Optional[bool] = None, is_private_exchange: Optional[bool] = None, page_size: Optional[int] = None, @@ -2657,6 +2633,7 @@ def search(self, Matches any of the following asset types :param categories: List[:class:`Category`] (optional) Matches any of the following categories + :param is_ascending: bool (optional) :param is_free: bool (optional) :param is_private_exchange: bool (optional) :param page_size: int (optional) @@ -2671,6 +2648,7 @@ def search(self, query = {} if assets is not None: query['assets'] = [v.value for v in assets] if categories is not None: query['categories'] = [v.value for v in categories] + if is_ascending is not None: query['is_ascending'] = is_ascending if is_free is not None: query['is_free'] = is_free if is_private_exchange is not None: query['is_private_exchange'] = is_private_exchange if page_size is not None: query['page_size'] = page_size diff --git a/databricks/sdk/service/pipelines.py b/databricks/sdk/service/pipelines.py index 8e2a7187c..83b2991ad 100755 --- a/databricks/sdk/service/pipelines.py +++ b/databricks/sdk/service/pipelines.py @@ -57,6 +57,9 @@ class CreatePipeline: filters: Optional[Filters] = None """Filters on which Pipeline packages to include in the deployed graph.""" + gateway_definition: Optional[IngestionGatewayPipelineDefinition] = None + """The definition of a gateway pipeline to support CDC.""" + id: Optional[str] = None """Unique identifier for this pipeline.""" @@ -104,6 +107,7 @@ def as_dict(self) -> dict: if self.dry_run is not None: body['dry_run'] = self.dry_run if self.edition is not None: body['edition'] = self.edition if self.filters: body['filters'] = self.filters.as_dict() + if self.gateway_definition: body['gateway_definition'] = self.gateway_definition.as_dict() if self.id is not None: body['id'] = self.id if self.ingestion_definition: body['ingestion_definition'] = self.ingestion_definition.as_dict() if self.libraries: body['libraries'] = [v.as_dict() for v in self.libraries] @@ -130,6 +134,7 @@ def from_dict(cls, d: Dict[str, any]) -> CreatePipeline: dry_run=d.get('dry_run', None), edition=d.get('edition', None), filters=_from_dict(d, 'filters', Filters), + gateway_definition=_from_dict(d, 'gateway_definition', IngestionGatewayPipelineDefinition), id=d.get('id', None), ingestion_definition=_from_dict(d, 'ingestion_definition', ManagedIngestionPipelineDefinition), @@ -266,6 +271,9 @@ class EditPipeline: filters: Optional[Filters] = None """Filters on which Pipeline packages to include in the deployed graph.""" + gateway_definition: Optional[IngestionGatewayPipelineDefinition] = None + """The definition of a gateway pipeline to support CDC.""" + id: Optional[str] = None """Unique identifier for this pipeline.""" @@ -317,6 +325,7 @@ def as_dict(self) -> dict: if self.expected_last_modified is not None: body['expected_last_modified'] = self.expected_last_modified if self.filters: body['filters'] = self.filters.as_dict() + if self.gateway_definition: body['gateway_definition'] = self.gateway_definition.as_dict() if self.id is not None: body['id'] = self.id if self.ingestion_definition: body['ingestion_definition'] = self.ingestion_definition.as_dict() if self.libraries: body['libraries'] = [v.as_dict() for v in self.libraries] @@ -344,6 +353,7 @@ def from_dict(cls, d: Dict[str, any]) -> EditPipeline: edition=d.get('edition', None), expected_last_modified=d.get('expected_last_modified', None), filters=_from_dict(d, 'filters', Filters), + gateway_definition=_from_dict(d, 'gateway_definition', IngestionGatewayPipelineDefinition), id=d.get('id', None), ingestion_definition=_from_dict(d, 'ingestion_definition', ManagedIngestionPipelineDefinition), @@ -570,6 +580,43 @@ def from_dict(cls, d: Dict[str, any]) -> IngestionConfig: return cls(schema=_from_dict(d, 'schema', SchemaSpec), table=_from_dict(d, 'table', TableSpec)) +@dataclass +class IngestionGatewayPipelineDefinition: + connection_id: Optional[str] = None + """Immutable. The Unity Catalog connection this gateway pipeline uses to communicate with the + source.""" + + gateway_storage_catalog: Optional[str] = None + """Required, Immutable. The name of the catalog for the gateway pipeline's storage location.""" + + gateway_storage_name: Optional[str] = None + """Required. The Unity Catalog-compatible naming for the gateway storage location. This is the + destination to use for the data that is extracted by the gateway. Delta Live Tables system will + automatically create the storage location under the catalog and schema.""" + + gateway_storage_schema: Optional[str] = None + """Required, Immutable. The name of the schema for the gateway pipelines's storage location.""" + + def as_dict(self) -> dict: + """Serializes the IngestionGatewayPipelineDefinition into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.connection_id is not None: body['connection_id'] = self.connection_id + if self.gateway_storage_catalog is not None: + body['gateway_storage_catalog'] = self.gateway_storage_catalog + if self.gateway_storage_name is not None: body['gateway_storage_name'] = self.gateway_storage_name + if self.gateway_storage_schema is not None: + body['gateway_storage_schema'] = self.gateway_storage_schema + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> IngestionGatewayPipelineDefinition: + """Deserializes the IngestionGatewayPipelineDefinition from a dictionary.""" + return cls(connection_id=d.get('connection_id', None), + gateway_storage_catalog=d.get('gateway_storage_catalog', None), + gateway_storage_name=d.get('gateway_storage_name', None), + gateway_storage_schema=d.get('gateway_storage_schema', None)) + + @dataclass class ListPipelineEventsResponse: events: Optional[List[PipelineEvent]] = None @@ -659,12 +706,17 @@ class ManagedIngestionPipelineDefinition: objects: Optional[List[IngestionConfig]] = None """Required. Settings specifying tables to replicate and the destination for the replicated tables.""" + table_configuration: Optional[TableSpecificConfig] = None + """Configuration settings to control the ingestion of tables. These settings are applied to all + tables in the pipeline.""" + def as_dict(self) -> dict: """Serializes the ManagedIngestionPipelineDefinition into a dictionary suitable for use as a JSON request body.""" body = {} if self.connection_name is not None: body['connection_name'] = self.connection_name if self.ingestion_gateway_id is not None: body['ingestion_gateway_id'] = self.ingestion_gateway_id if self.objects: body['objects'] = [v.as_dict() for v in self.objects] + if self.table_configuration: body['table_configuration'] = self.table_configuration.as_dict() return body @classmethod @@ -672,7 +724,8 @@ def from_dict(cls, d: Dict[str, any]) -> ManagedIngestionPipelineDefinition: """Deserializes the ManagedIngestionPipelineDefinition from a dictionary.""" return cls(connection_name=d.get('connection_name', None), ingestion_gateway_id=d.get('ingestion_gateway_id', None), - objects=_repeated_dict(d, 'objects', IngestionConfig)) + objects=_repeated_dict(d, 'objects', IngestionConfig), + table_configuration=_from_dict(d, 'table_configuration', TableSpecificConfig)) @dataclass @@ -1344,6 +1397,9 @@ class PipelineSpec: filters: Optional[Filters] = None """Filters on which Pipeline packages to include in the deployed graph.""" + gateway_definition: Optional[IngestionGatewayPipelineDefinition] = None + """The definition of a gateway pipeline to support CDC.""" + id: Optional[str] = None """Unique identifier for this pipeline.""" @@ -1389,6 +1445,7 @@ def as_dict(self) -> dict: if self.development is not None: body['development'] = self.development if self.edition is not None: body['edition'] = self.edition if self.filters: body['filters'] = self.filters.as_dict() + if self.gateway_definition: body['gateway_definition'] = self.gateway_definition.as_dict() if self.id is not None: body['id'] = self.id if self.ingestion_definition: body['ingestion_definition'] = self.ingestion_definition.as_dict() if self.libraries: body['libraries'] = [v.as_dict() for v in self.libraries] @@ -1413,6 +1470,7 @@ def from_dict(cls, d: Dict[str, any]) -> PipelineSpec: development=d.get('development', None), edition=d.get('edition', None), filters=_from_dict(d, 'filters', Filters), + gateway_definition=_from_dict(d, 'gateway_definition', IngestionGatewayPipelineDefinition), id=d.get('id', None), ingestion_definition=_from_dict(d, 'ingestion_definition', ManagedIngestionPipelineDefinition), @@ -1523,6 +1581,11 @@ class SchemaSpec: source_schema: Optional[str] = None """Required. Schema name in the source database.""" + table_configuration: Optional[TableSpecificConfig] = None + """Configuration settings to control the ingestion of tables. These settings are applied to all + tables in this schema and override the table_configuration defined in the + ManagedIngestionPipelineDefinition object.""" + def as_dict(self) -> dict: """Serializes the SchemaSpec into a dictionary suitable for use as a JSON request body.""" body = {} @@ -1530,6 +1593,7 @@ def as_dict(self) -> dict: if self.destination_schema is not None: body['destination_schema'] = self.destination_schema if self.source_catalog is not None: body['source_catalog'] = self.source_catalog if self.source_schema is not None: body['source_schema'] = self.source_schema + if self.table_configuration: body['table_configuration'] = self.table_configuration.as_dict() return body @classmethod @@ -1538,7 +1602,8 @@ def from_dict(cls, d: Dict[str, any]) -> SchemaSpec: return cls(destination_catalog=d.get('destination_catalog', None), destination_schema=d.get('destination_schema', None), source_catalog=d.get('source_catalog', None), - source_schema=d.get('source_schema', None)) + source_schema=d.get('source_schema', None), + table_configuration=_from_dict(d, 'table_configuration', TableSpecificConfig)) @dataclass @@ -1729,6 +1794,10 @@ class TableSpec: source_table: Optional[str] = None """Required. Table name in the source database.""" + table_configuration: Optional[TableSpecificConfig] = None + """Configuration settings to control the ingestion of tables. These settings override the + table_configuration defined in the ManagedIngestionPipelineDefinition object and the SchemaSpec.""" + def as_dict(self) -> dict: """Serializes the TableSpec into a dictionary suitable for use as a JSON request body.""" body = {} @@ -1738,6 +1807,7 @@ def as_dict(self) -> dict: if self.source_catalog is not None: body['source_catalog'] = self.source_catalog if self.source_schema is not None: body['source_schema'] = self.source_schema if self.source_table is not None: body['source_table'] = self.source_table + if self.table_configuration: body['table_configuration'] = self.table_configuration.as_dict() return body @classmethod @@ -1748,7 +1818,44 @@ def from_dict(cls, d: Dict[str, any]) -> TableSpec: destination_table=d.get('destination_table', None), source_catalog=d.get('source_catalog', None), source_schema=d.get('source_schema', None), - source_table=d.get('source_table', None)) + source_table=d.get('source_table', None), + table_configuration=_from_dict(d, 'table_configuration', TableSpecificConfig)) + + +@dataclass +class TableSpecificConfig: + primary_keys: Optional[List[str]] = None + """The primary key of the table used to apply changes.""" + + salesforce_include_formula_fields: Optional[bool] = None + """If true, formula fields defined in the table are included in the ingestion. This setting is only + valid for the Salesforce connector""" + + scd_type: Optional[TableSpecificConfigScdType] = None + """The SCD type to use to ingest the table.""" + + def as_dict(self) -> dict: + """Serializes the TableSpecificConfig into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.primary_keys: body['primary_keys'] = [v for v in self.primary_keys] + if self.salesforce_include_formula_fields is not None: + body['salesforce_include_formula_fields'] = self.salesforce_include_formula_fields + if self.scd_type is not None: body['scd_type'] = self.scd_type.value + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> TableSpecificConfig: + """Deserializes the TableSpecificConfig from a dictionary.""" + return cls(primary_keys=d.get('primary_keys', None), + salesforce_include_formula_fields=d.get('salesforce_include_formula_fields', None), + scd_type=_enum(d, 'scd_type', TableSpecificConfigScdType)) + + +class TableSpecificConfigScdType(Enum): + """The SCD type to use to ingest the table.""" + + SCD_TYPE_1 = 'SCD_TYPE_1' + SCD_TYPE_2 = 'SCD_TYPE_2' @dataclass @@ -1981,6 +2088,7 @@ def create(self, dry_run: Optional[bool] = None, edition: Optional[str] = None, filters: Optional[Filters] = None, + gateway_definition: Optional[IngestionGatewayPipelineDefinition] = None, id: Optional[str] = None, ingestion_definition: Optional[ManagedIngestionPipelineDefinition] = None, libraries: Optional[List[PipelineLibrary]] = None, @@ -2019,6 +2127,8 @@ def create(self, Pipeline product edition. :param filters: :class:`Filters` (optional) Filters on which Pipeline packages to include in the deployed graph. + :param gateway_definition: :class:`IngestionGatewayPipelineDefinition` (optional) + The definition of a gateway pipeline to support CDC. :param id: str (optional) Unique identifier for this pipeline. :param ingestion_definition: :class:`ManagedIngestionPipelineDefinition` (optional) @@ -2056,6 +2166,7 @@ def create(self, if dry_run is not None: body['dry_run'] = dry_run if edition is not None: body['edition'] = edition if filters is not None: body['filters'] = filters.as_dict() + if gateway_definition is not None: body['gateway_definition'] = gateway_definition.as_dict() if id is not None: body['id'] = id if ingestion_definition is not None: body['ingestion_definition'] = ingestion_definition.as_dict() if libraries is not None: body['libraries'] = [v.as_dict() for v in libraries] @@ -2385,6 +2496,7 @@ def update(self, edition: Optional[str] = None, expected_last_modified: Optional[int] = None, filters: Optional[Filters] = None, + gateway_definition: Optional[IngestionGatewayPipelineDefinition] = None, id: Optional[str] = None, ingestion_definition: Optional[ManagedIngestionPipelineDefinition] = None, libraries: Optional[List[PipelineLibrary]] = None, @@ -2426,6 +2538,8 @@ def update(self, modified after that time, then the request will fail with a conflict. :param filters: :class:`Filters` (optional) Filters on which Pipeline packages to include in the deployed graph. + :param gateway_definition: :class:`IngestionGatewayPipelineDefinition` (optional) + The definition of a gateway pipeline to support CDC. :param id: str (optional) Unique identifier for this pipeline. :param ingestion_definition: :class:`ManagedIngestionPipelineDefinition` (optional) @@ -2463,6 +2577,7 @@ def update(self, if edition is not None: body['edition'] = edition if expected_last_modified is not None: body['expected_last_modified'] = expected_last_modified if filters is not None: body['filters'] = filters.as_dict() + if gateway_definition is not None: body['gateway_definition'] = gateway_definition.as_dict() if id is not None: body['id'] = id if ingestion_definition is not None: body['ingestion_definition'] = ingestion_definition.as_dict() if libraries is not None: body['libraries'] = [v.as_dict() for v in libraries] diff --git a/databricks/sdk/service/serving.py b/databricks/sdk/service/serving.py index 5c9a061f9..c6a32c826 100755 --- a/databricks/sdk/service/serving.py +++ b/databricks/sdk/service/serving.py @@ -8,7 +8,7 @@ from dataclasses import dataclass from datetime import timedelta from enum import Enum -from typing import Any, Callable, Dict, Iterator, List, Optional +from typing import Any, BinaryIO, Callable, Dict, Iterator, List, Optional from ..errors import OperationFailed from ._internal import Wait, _enum, _from_dict, _repeated_dict @@ -170,6 +170,9 @@ class AppDeployment: creator: Optional[str] = None """The email of the user creates the deployment.""" + deployment_artifacts: Optional[AppDeploymentArtifacts] = None + """The deployment artifacts for an app.""" + deployment_id: Optional[str] = None """The unique id of the deployment.""" @@ -184,6 +187,7 @@ def as_dict(self) -> dict: body = {} if self.create_time is not None: body['create_time'] = self.create_time if self.creator is not None: body['creator'] = self.creator + if self.deployment_artifacts: body['deployment_artifacts'] = self.deployment_artifacts.as_dict() if self.deployment_id is not None: body['deployment_id'] = self.deployment_id if self.source_code_path is not None: body['source_code_path'] = self.source_code_path if self.status: body['status'] = self.status.as_dict() @@ -195,18 +199,37 @@ def from_dict(cls, d: Dict[str, any]) -> AppDeployment: """Deserializes the AppDeployment from a dictionary.""" return cls(create_time=d.get('create_time', None), creator=d.get('creator', None), + deployment_artifacts=_from_dict(d, 'deployment_artifacts', AppDeploymentArtifacts), deployment_id=d.get('deployment_id', None), source_code_path=d.get('source_code_path', None), status=_from_dict(d, 'status', AppDeploymentStatus), update_time=d.get('update_time', None)) +@dataclass +class AppDeploymentArtifacts: + source_code_path: Optional[str] = None + """The source code of the deployment.""" + + def as_dict(self) -> dict: + """Serializes the AppDeploymentArtifacts into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.source_code_path is not None: body['source_code_path'] = self.source_code_path + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> AppDeploymentArtifacts: + """Deserializes the AppDeploymentArtifacts from a dictionary.""" + return cls(source_code_path=d.get('source_code_path', None)) + + class AppDeploymentState(Enum): CANCELLED = 'CANCELLED' FAILED = 'FAILED' IN_PROGRESS = 'IN_PROGRESS' STATE_UNSPECIFIED = 'STATE_UNSPECIFIED' + STOPPED = 'STOPPED' SUCCEEDED = 'SUCCEEDED' @@ -491,6 +514,9 @@ class CreateServingEndpoint: """Rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.""" + route_optimized: Optional[bool] = None + """Enable route optimization for the serving endpoint.""" + tags: Optional[List[EndpointTag]] = None """Tags to be attached to the serving endpoint and automatically propagated to billing logs.""" @@ -500,6 +526,7 @@ def as_dict(self) -> dict: if self.config: body['config'] = self.config.as_dict() if self.name is not None: body['name'] = self.name if self.rate_limits: body['rate_limits'] = [v.as_dict() for v in self.rate_limits] + if self.route_optimized is not None: body['route_optimized'] = self.route_optimized if self.tags: body['tags'] = [v.as_dict() for v in self.tags] return body @@ -509,6 +536,7 @@ def from_dict(cls, d: Dict[str, any]) -> CreateServingEndpoint: return cls(config=_from_dict(d, 'config', EndpointCoreConfigInput), name=d.get('name', None), rate_limits=_repeated_dict(d, 'rate_limits', RateLimit), + route_optimized=d.get('route_optimized', None), tags=_repeated_dict(d, 'tags', EndpointTag)) @@ -844,16 +872,18 @@ def from_dict(cls, d: Dict[str, any]) -> EnvVariable: @dataclass class ExportMetricsResponse: + contents: Optional[BinaryIO] = None def as_dict(self) -> dict: """Serializes the ExportMetricsResponse into a dictionary suitable for use as a JSON request body.""" body = {} + if self.contents: body['contents'] = self.contents return body @classmethod def from_dict(cls, d: Dict[str, any]) -> ExportMetricsResponse: """Deserializes the ExportMetricsResponse from a dictionary.""" - return cls() + return cls(contents=d.get('contents', None)) @dataclass @@ -1090,14 +1120,24 @@ def from_dict(cls, d: Dict[str, any]) -> ListEndpointsResponse: @dataclass class OpenAiConfig: - openai_api_key: str - """The Databricks secret key reference for an OpenAI or Azure OpenAI API key.""" + microsoft_entra_client_id: Optional[str] = None + """This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.""" + + microsoft_entra_client_secret: Optional[str] = None + """The Databricks secret key reference for the Microsoft Entra Client Secret that is only required + for Azure AD OpenAI.""" + + microsoft_entra_tenant_id: Optional[str] = None + """This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.""" openai_api_base: Optional[str] = None """This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure.""" + openai_api_key: Optional[str] = None + """The Databricks secret key reference for an OpenAI or Azure OpenAI API key.""" + openai_api_type: Optional[str] = None """This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation @@ -1118,6 +1158,12 @@ class OpenAiConfig: def as_dict(self) -> dict: """Serializes the OpenAiConfig into a dictionary suitable for use as a JSON request body.""" body = {} + if self.microsoft_entra_client_id is not None: + body['microsoft_entra_client_id'] = self.microsoft_entra_client_id + if self.microsoft_entra_client_secret is not None: + body['microsoft_entra_client_secret'] = self.microsoft_entra_client_secret + if self.microsoft_entra_tenant_id is not None: + body['microsoft_entra_tenant_id'] = self.microsoft_entra_tenant_id if self.openai_api_base is not None: body['openai_api_base'] = self.openai_api_base if self.openai_api_key is not None: body['openai_api_key'] = self.openai_api_key if self.openai_api_type is not None: body['openai_api_type'] = self.openai_api_type @@ -1130,7 +1176,10 @@ def as_dict(self) -> dict: @classmethod def from_dict(cls, d: Dict[str, any]) -> OpenAiConfig: """Deserializes the OpenAiConfig from a dictionary.""" - return cls(openai_api_base=d.get('openai_api_base', None), + return cls(microsoft_entra_client_id=d.get('microsoft_entra_client_id', None), + microsoft_entra_client_secret=d.get('microsoft_entra_client_secret', None), + microsoft_entra_tenant_id=d.get('microsoft_entra_tenant_id', None), + openai_api_base=d.get('openai_api_base', None), openai_api_key=d.get('openai_api_key', None), openai_api_type=d.get('openai_api_type', None), openai_api_version=d.get('openai_api_version', None), @@ -2123,6 +2172,9 @@ class ServingEndpointDetailed: creator: Optional[str] = None """The email of the user who created the serving endpoint.""" + endpoint_url: Optional[str] = None + """Endpoint invocation url if route optimization is enabled for endpoint""" + id: Optional[str] = None """System-generated ID of the endpoint. This is used to refer to the endpoint in the Permissions API""" @@ -2139,6 +2191,9 @@ class ServingEndpointDetailed: permission_level: Optional[ServingEndpointDetailedPermissionLevel] = None """The permission level of the principal making the request.""" + route_optimized: Optional[bool] = None + """Boolean representing if route optimization has been enabled for the endpoint""" + state: Optional[EndpointState] = None """Information corresponding to the state of the serving endpoint.""" @@ -2154,12 +2209,14 @@ def as_dict(self) -> dict: if self.config: body['config'] = self.config.as_dict() if self.creation_timestamp is not None: body['creation_timestamp'] = self.creation_timestamp if self.creator is not None: body['creator'] = self.creator + if self.endpoint_url is not None: body['endpoint_url'] = self.endpoint_url if self.id is not None: body['id'] = self.id if self.last_updated_timestamp is not None: body['last_updated_timestamp'] = self.last_updated_timestamp if self.name is not None: body['name'] = self.name if self.pending_config: body['pending_config'] = self.pending_config.as_dict() if self.permission_level is not None: body['permission_level'] = self.permission_level.value + if self.route_optimized is not None: body['route_optimized'] = self.route_optimized if self.state: body['state'] = self.state.as_dict() if self.tags: body['tags'] = [v.as_dict() for v in self.tags] if self.task is not None: body['task'] = self.task @@ -2171,11 +2228,13 @@ def from_dict(cls, d: Dict[str, any]) -> ServingEndpointDetailed: return cls(config=_from_dict(d, 'config', EndpointCoreConfigOutput), creation_timestamp=d.get('creation_timestamp', None), creator=d.get('creator', None), + endpoint_url=d.get('endpoint_url', None), id=d.get('id', None), last_updated_timestamp=d.get('last_updated_timestamp', None), name=d.get('name', None), pending_config=_from_dict(d, 'pending_config', EndpointPendingConfig), permission_level=_enum(d, 'permission_level', ServingEndpointDetailedPermissionLevel), + route_optimized=d.get('route_optimized', None), state=_from_dict(d, 'state', EndpointState), tags=_repeated_dict(d, 'tags', EndpointTag), task=d.get('task', None)) @@ -2760,6 +2819,7 @@ def create(self, config: EndpointCoreConfigInput, *, rate_limits: Optional[List[RateLimit]] = None, + route_optimized: Optional[bool] = None, tags: Optional[List[EndpointTag]] = None) -> Wait[ServingEndpointDetailed]: """Create a new serving endpoint. @@ -2771,6 +2831,8 @@ def create(self, :param rate_limits: List[:class:`RateLimit`] (optional) Rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now. + :param route_optimized: bool (optional) + Enable route optimization for the serving endpoint. :param tags: List[:class:`EndpointTag`] (optional) Tags to be attached to the serving endpoint and automatically propagated to billing logs. @@ -2782,6 +2844,7 @@ def create(self, if config is not None: body['config'] = config.as_dict() if name is not None: body['name'] = name if rate_limits is not None: body['rate_limits'] = [v.as_dict() for v in rate_limits] + if route_optimized is not None: body['route_optimized'] = route_optimized if tags is not None: body['tags'] = [v.as_dict() for v in tags] headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } @@ -2796,9 +2859,13 @@ def create_and_wait( config: EndpointCoreConfigInput, *, rate_limits: Optional[List[RateLimit]] = None, + route_optimized: Optional[bool] = None, tags: Optional[List[EndpointTag]] = None, timeout=timedelta(minutes=20)) -> ServingEndpointDetailed: - return self.create(config=config, name=name, rate_limits=rate_limits, + return self.create(config=config, + name=name, + rate_limits=rate_limits, + route_optimized=route_optimized, tags=tags).result(timeout=timeout) def delete(self, name: str): @@ -2814,7 +2881,7 @@ def delete(self, name: str): self._api.do('DELETE', f'/api/2.0/serving-endpoints/{name}', headers=headers) - def export_metrics(self, name: str): + def export_metrics(self, name: str) -> ExportMetricsResponse: """Get metrics of a serving endpoint. Retrieves the metrics associated with the provided serving endpoint in either Prometheus or @@ -2823,12 +2890,13 @@ def export_metrics(self, name: str): :param name: str The name of the serving endpoint to retrieve metrics for. This field is required. - + :returns: :class:`ExportMetricsResponse` """ - headers = {} + headers = {'Accept': 'text/plain', } - self._api.do('GET', f'/api/2.0/serving-endpoints/{name}/metrics', headers=headers) + res = self._api.do('GET', f'/api/2.0/serving-endpoints/{name}/metrics', headers=headers, raw=True) + return ExportMetricsResponse.from_dict(res) def get(self, name: str) -> ServingEndpointDetailed: """Get a single serving endpoint. diff --git a/databricks/sdk/service/sharing.py b/databricks/sdk/service/sharing.py index d01c9e8bf..fd01ea563 100755 --- a/databricks/sdk/service/sharing.py +++ b/databricks/sdk/service/sharing.py @@ -531,17 +531,23 @@ class CreateShare: comment: Optional[str] = None """User-provided free-form text description.""" + storage_root: Optional[str] = None + """Storage root URL for the share.""" + def as_dict(self) -> dict: """Serializes the CreateShare into a dictionary suitable for use as a JSON request body.""" body = {} if self.comment is not None: body['comment'] = self.comment if self.name is not None: body['name'] = self.name + if self.storage_root is not None: body['storage_root'] = self.storage_root return body @classmethod def from_dict(cls, d: Dict[str, any]) -> CreateShare: """Deserializes the CreateShare from a dictionary.""" - return cls(comment=d.get('comment', None), name=d.get('name', None)) + return cls(comment=d.get('comment', None), + name=d.get('name', None), + storage_root=d.get('storage_root', None)) @dataclass @@ -790,6 +796,7 @@ class Privilege(Enum): REFRESH = 'REFRESH' SELECT = 'SELECT' SET_SHARE_PERMISSION = 'SET_SHARE_PERMISSION' + SINGLE_USER_ACCESS = 'SINGLE_USER_ACCESS' USAGE = 'USAGE' USE_CATALOG = 'USE_CATALOG' USE_CONNECTION = 'USE_CONNECTION' @@ -1215,6 +1222,12 @@ class ShareInfo: owner: Optional[str] = None """Username of current owner of share.""" + storage_location: Optional[str] = None + """Storage Location URL (full path) for the share.""" + + storage_root: Optional[str] = None + """Storage root URL for the share.""" + updated_at: Optional[int] = None """Time at which this share was updated, in epoch milliseconds.""" @@ -1230,6 +1243,8 @@ def as_dict(self) -> dict: if self.name is not None: body['name'] = self.name if self.objects: body['objects'] = [v.as_dict() for v in self.objects] if self.owner is not None: body['owner'] = self.owner + if self.storage_location is not None: body['storage_location'] = self.storage_location + if self.storage_root is not None: body['storage_root'] = self.storage_root if self.updated_at is not None: body['updated_at'] = self.updated_at if self.updated_by is not None: body['updated_by'] = self.updated_by return body @@ -1243,6 +1258,8 @@ def from_dict(cls, d: Dict[str, any]) -> ShareInfo: name=d.get('name', None), objects=_repeated_dict(d, 'objects', SharedDataObject), owner=d.get('owner', None), + storage_location=d.get('storage_location', None), + storage_root=d.get('storage_root', None), updated_at=d.get('updated_at', None), updated_by=d.get('updated_by', None)) @@ -1576,6 +1593,9 @@ class UpdateShare: owner: Optional[str] = None """Username of current owner of share.""" + storage_root: Optional[str] = None + """Storage root URL for the share.""" + updates: Optional[List[SharedDataObjectUpdate]] = None """Array of shared data object updates.""" @@ -1586,6 +1606,7 @@ def as_dict(self) -> dict: if self.name is not None: body['name'] = self.name if self.new_name is not None: body['new_name'] = self.new_name if self.owner is not None: body['owner'] = self.owner + if self.storage_root is not None: body['storage_root'] = self.storage_root if self.updates: body['updates'] = [v.as_dict() for v in self.updates] return body @@ -1596,6 +1617,7 @@ def from_dict(cls, d: Dict[str, any]) -> UpdateShare: name=d.get('name', None), new_name=d.get('new_name', None), owner=d.get('owner', None), + storage_root=d.get('storage_root', None), updates=_repeated_dict(d, 'updates', SharedDataObjectUpdate)) @@ -2193,7 +2215,11 @@ class SharesAPI: def __init__(self, api_client): self._api = api_client - def create(self, name: str, *, comment: Optional[str] = None) -> ShareInfo: + def create(self, + name: str, + *, + comment: Optional[str] = None, + storage_root: Optional[str] = None) -> ShareInfo: """Create a share. Creates a new share for data objects. Data objects can be added after creation with **update**. The @@ -2203,12 +2229,15 @@ def create(self, name: str, *, comment: Optional[str] = None) -> ShareInfo: Name of the share. :param comment: str (optional) User-provided free-form text description. + :param storage_root: str (optional) + Storage root URL for the share. :returns: :class:`ShareInfo` """ body = {} if comment is not None: body['comment'] = comment if name is not None: body['name'] = name + if storage_root is not None: body['storage_root'] = storage_root headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } res = self._api.do('POST', '/api/2.1/unity-catalog/shares', body=body, headers=headers) @@ -2288,6 +2317,7 @@ def update(self, comment: Optional[str] = None, new_name: Optional[str] = None, owner: Optional[str] = None, + storage_root: Optional[str] = None, updates: Optional[List[SharedDataObjectUpdate]] = None) -> ShareInfo: """Update a share. @@ -2299,6 +2329,8 @@ def update(self, In the case that the share name is changed, **updateShare** requires that the caller is both the share owner and a metastore admin. + If there are notebook files in the share, the __storage_root__ field cannot be updated. + For each table that is added through this method, the share owner must also have **SELECT** privilege on the table. This privilege must be maintained indefinitely for recipients to be able to access the table. Typically, you should use a group as the share owner. @@ -2313,6 +2345,8 @@ def update(self, New name for the share. :param owner: str (optional) Username of current owner of share. + :param storage_root: str (optional) + Storage root URL for the share. :param updates: List[:class:`SharedDataObjectUpdate`] (optional) Array of shared data object updates. @@ -2322,6 +2356,7 @@ def update(self, if comment is not None: body['comment'] = comment if new_name is not None: body['new_name'] = new_name if owner is not None: body['owner'] = owner + if storage_root is not None: body['storage_root'] = storage_root if updates is not None: body['updates'] = [v.as_dict() for v in updates] headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } diff --git a/databricks/sdk/service/sql.py b/databricks/sdk/service/sql.py index bfef44af3..557b53e37 100755 --- a/databricks/sdk/service/sql.py +++ b/databricks/sdk/service/sql.py @@ -360,7 +360,6 @@ def from_dict(cls, d: Dict[str, any]) -> ChannelInfo: class ChannelName(Enum): - """Name of the channel""" CHANNEL_NAME_CURRENT = 'CHANNEL_NAME_CURRENT' CHANNEL_NAME_CUSTOM = 'CHANNEL_NAME_CUSTOM' diff --git a/databricks/sdk/service/vectorsearch.py b/databricks/sdk/service/vectorsearch.py index 94c0125a8..a43ae5869 100755 --- a/databricks/sdk/service/vectorsearch.py +++ b/databricks/sdk/service/vectorsearch.py @@ -235,7 +235,11 @@ class DeltaSyncVectorIndexSpecRequest: """The columns that contain the embedding source.""" embedding_vector_columns: Optional[List[EmbeddingVectorColumn]] = None - """The columns that contain the embedding vectors.""" + """The columns that contain the embedding vectors. The format should be array[double].""" + + embedding_writeback_table: Optional[str] = None + """[Optional] Automatically sync the vector index contents and computed embeddings to the specified + Delta table. The only supported table name is the index name with the suffix `_writeback_table`.""" pipeline_type: Optional[PipelineType] = None """Pipeline execution mode. @@ -256,6 +260,8 @@ def as_dict(self) -> dict: body['embedding_source_columns'] = [v.as_dict() for v in self.embedding_source_columns] if self.embedding_vector_columns: body['embedding_vector_columns'] = [v.as_dict() for v in self.embedding_vector_columns] + if self.embedding_writeback_table is not None: + body['embedding_writeback_table'] = self.embedding_writeback_table if self.pipeline_type is not None: body['pipeline_type'] = self.pipeline_type.value if self.source_table is not None: body['source_table'] = self.source_table return body @@ -267,6 +273,7 @@ def from_dict(cls, d: Dict[str, any]) -> DeltaSyncVectorIndexSpecRequest: EmbeddingSourceColumn), embedding_vector_columns=_repeated_dict(d, 'embedding_vector_columns', EmbeddingVectorColumn), + embedding_writeback_table=d.get('embedding_writeback_table', None), pipeline_type=_enum(d, 'pipeline_type', PipelineType), source_table=d.get('source_table', None)) @@ -279,6 +286,9 @@ class DeltaSyncVectorIndexSpecResponse: embedding_vector_columns: Optional[List[EmbeddingVectorColumn]] = None """The columns that contain the embedding vectors.""" + embedding_writeback_table: Optional[str] = None + """[Optional] Name of the Delta table to sync the vector index contents and computed embeddings to.""" + pipeline_id: Optional[str] = None """The ID of the pipeline that is used to sync the index.""" @@ -301,6 +311,8 @@ def as_dict(self) -> dict: body['embedding_source_columns'] = [v.as_dict() for v in self.embedding_source_columns] if self.embedding_vector_columns: body['embedding_vector_columns'] = [v.as_dict() for v in self.embedding_vector_columns] + if self.embedding_writeback_table is not None: + body['embedding_writeback_table'] = self.embedding_writeback_table if self.pipeline_id is not None: body['pipeline_id'] = self.pipeline_id if self.pipeline_type is not None: body['pipeline_type'] = self.pipeline_type.value if self.source_table is not None: body['source_table'] = self.source_table @@ -313,6 +325,7 @@ def from_dict(cls, d: Dict[str, any]) -> DeltaSyncVectorIndexSpecResponse: EmbeddingSourceColumn), embedding_vector_columns=_repeated_dict(d, 'embedding_vector_columns', EmbeddingVectorColumn), + embedding_writeback_table=d.get('embedding_writeback_table', None), pipeline_id=d.get('pipeline_id', None), pipeline_type=_enum(d, 'pipeline_type', PipelineType), source_table=d.get('source_table', None)) @@ -515,6 +528,22 @@ def from_dict(cls, d: Dict[str, any]) -> ListEndpointResponse: next_page_token=d.get('next_page_token', None)) +@dataclass +class ListValue: + values: Optional[List[Value]] = None + + def as_dict(self) -> dict: + """Serializes the ListValue into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.values: body['values'] = [v.as_dict() for v in self.values] + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> ListValue: + """Deserializes the ListValue from a dictionary.""" + return cls(values=_repeated_dict(d, 'values', Value)) + + @dataclass class ListVectorIndexesResponse: next_page_token: Optional[str] = None @@ -537,6 +566,29 @@ def from_dict(cls, d: Dict[str, any]) -> ListVectorIndexesResponse: vector_indexes=_repeated_dict(d, 'vector_indexes', MiniVectorIndex)) +@dataclass +class MapStringValueEntry: + """Key-value pair.""" + + key: Optional[str] = None + """Column name.""" + + value: Optional[Value] = None + """Column value, nullable.""" + + def as_dict(self) -> dict: + """Serializes the MapStringValueEntry into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.key is not None: body['key'] = self.key + if self.value: body['value'] = self.value.as_dict() + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MapStringValueEntry: + """Deserializes the MapStringValueEntry from a dictionary.""" + return cls(key=d.get('key', None), value=_from_dict(d, 'value', Value)) + + @dataclass class MiniVectorIndex: creator: Optional[str] = None @@ -712,6 +764,75 @@ def from_dict(cls, d: Dict[str, any]) -> ResultManifest: return cls(column_count=d.get('column_count', None), columns=_repeated_dict(d, 'columns', ColumnInfo)) +@dataclass +class ScanVectorIndexRequest: + """Request payload for scanning data from a vector index.""" + + index_name: Optional[str] = None + """Name of the vector index to scan.""" + + last_primary_key: Optional[str] = None + """Primary key of the last entry returned in the previous scan.""" + + num_results: Optional[int] = None + """Number of results to return. Defaults to 10.""" + + def as_dict(self) -> dict: + """Serializes the ScanVectorIndexRequest into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.index_name is not None: body['index_name'] = self.index_name + if self.last_primary_key is not None: body['last_primary_key'] = self.last_primary_key + if self.num_results is not None: body['num_results'] = self.num_results + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> ScanVectorIndexRequest: + """Deserializes the ScanVectorIndexRequest from a dictionary.""" + return cls(index_name=d.get('index_name', None), + last_primary_key=d.get('last_primary_key', None), + num_results=d.get('num_results', None)) + + +@dataclass +class ScanVectorIndexResponse: + """Response to a scan vector index request.""" + + data: Optional[List[Struct]] = None + """List of data entries""" + + last_primary_key: Optional[str] = None + """Primary key of the last entry.""" + + def as_dict(self) -> dict: + """Serializes the ScanVectorIndexResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.data: body['data'] = [v.as_dict() for v in self.data] + if self.last_primary_key is not None: body['last_primary_key'] = self.last_primary_key + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> ScanVectorIndexResponse: + """Deserializes the ScanVectorIndexResponse from a dictionary.""" + return cls(data=_repeated_dict(d, 'data', Struct), last_primary_key=d.get('last_primary_key', None)) + + +@dataclass +class Struct: + fields: Optional[List[MapStringValueEntry]] = None + """Data entry, corresponding to a row in a vector index.""" + + def as_dict(self) -> dict: + """Serializes the Struct into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.fields: body['fields'] = [v.as_dict() for v in self.fields] + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> Struct: + """Deserializes the Struct from a dictionary.""" + return cls(fields=_repeated_dict(d, 'fields', MapStringValueEntry)) + + @dataclass class SyncIndexResponse: @@ -805,6 +926,42 @@ def from_dict(cls, d: Dict[str, any]) -> UpsertDataVectorIndexResponse: status=_enum(d, 'status', UpsertDataStatus)) +@dataclass +class Value: + bool_value: Optional[bool] = None + + list_value: Optional[ListValue] = None + + null_value: Optional[str] = None + + number_value: Optional[float] = None + + string_value: Optional[str] = None + + struct_value: Optional[Struct] = None + + def as_dict(self) -> dict: + """Serializes the Value into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.bool_value is not None: body['bool_value'] = self.bool_value + if self.list_value: body['list_value'] = self.list_value.as_dict() + if self.null_value is not None: body['null_value'] = self.null_value + if self.number_value is not None: body['number_value'] = self.number_value + if self.string_value is not None: body['string_value'] = self.string_value + if self.struct_value: body['struct_value'] = self.struct_value.as_dict() + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> Value: + """Deserializes the Value from a dictionary.""" + return cls(bool_value=d.get('bool_value', None), + list_value=_from_dict(d, 'list_value', ListValue), + null_value=d.get('null_value', None), + number_value=d.get('number_value', None), + string_value=d.get('string_value', None), + struct_value=_from_dict(d, 'struct_value', Struct)) + + @dataclass class VectorIndex: creator: Optional[str] = None @@ -1216,6 +1373,36 @@ def query_index(self, headers=headers) return QueryVectorIndexResponse.from_dict(res) + def scan_index(self, + index_name: str, + *, + last_primary_key: Optional[str] = None, + num_results: Optional[int] = None) -> ScanVectorIndexResponse: + """Scan an index. + + Scan the specified vector index and return the first `num_results` entries after the exclusive + `primary_key`. + + :param index_name: str + Name of the vector index to scan. + :param last_primary_key: str (optional) + Primary key of the last entry returned in the previous scan. + :param num_results: int (optional) + Number of results to return. Defaults to 10. + + :returns: :class:`ScanVectorIndexResponse` + """ + body = {} + if last_primary_key is not None: body['last_primary_key'] = last_primary_key + if num_results is not None: body['num_results'] = num_results + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + + res = self._api.do('POST', + f'/api/2.0/vector-search/indexes/{index_name}/scan', + body=body, + headers=headers) + return ScanVectorIndexResponse.from_dict(res) + def sync_index(self, index_name: str): """Synchronize an index. diff --git a/databricks/sdk/service/workspace.py b/databricks/sdk/service/workspace.py index 0bf9e3c00..7be221323 100755 --- a/databricks/sdk/service/workspace.py +++ b/databricks/sdk/service/workspace.py @@ -144,7 +144,8 @@ class CreateRepo: gitLabEnterpriseEdition and awsCodeCommit.""" path: Optional[str] = None - """Desired path for the repo in the workspace. Must be in the format /Repos/{folder}/{repo-name}.""" + """Desired path for the repo in the workspace. Almost any path in the workspace can be chosen. If + repo is created in /Repos, path must be in the format /Repos/{folder}/{repo-name}.""" sparse_checkout: Optional[SparseCheckout] = None """If specified, the repo will be created with sparse checkout enabled. You cannot enable/disable @@ -949,7 +950,8 @@ class RepoInfo: """ID of the repo object in the workspace.""" path: Optional[str] = None - """Desired path for the repo in the workspace. Must be in the format /Repos/{folder}/{repo-name}.""" + """Desired path for the repo in the workspace. Almost any path in the workspace can be chosen. If + repo is created in /Repos, path must be in the format /Repos/{folder}/{repo-name}.""" provider: Optional[str] = None """Git provider. This field is case-insensitive. The available Git providers are gitHub, @@ -1613,7 +1615,8 @@ def create(self, bitbucketCloud, gitLab, azureDevOpsServices, gitHubEnterprise, bitbucketServer, gitLabEnterpriseEdition and awsCodeCommit. :param path: str (optional) - Desired path for the repo in the workspace. Must be in the format /Repos/{folder}/{repo-name}. + Desired path for the repo in the workspace. Almost any path in the workspace can be chosen. If repo + is created in /Repos, path must be in the format /Repos/{folder}/{repo-name}. :param sparse_checkout: :class:`SparseCheckout` (optional) If specified, the repo will be created with sparse checkout enabled. You cannot enable/disable sparse checkout after the repo is created. @@ -1706,7 +1709,8 @@ def list(self, Token used to get the next page of results. If not specified, returns the first page of results as well as a next page token if there are more results. :param path_prefix: str (optional) - Filters repos that have paths starting with the given path prefix. + Filters repos that have paths starting with the given path prefix. If not provided repos from /Repos + will be served. :returns: Iterator over :class:`RepoInfo` """ diff --git a/databricks/sdk/version.py b/databricks/sdk/version.py index 07f3dd706..1bf36757c 100644 --- a/databricks/sdk/version.py +++ b/databricks/sdk/version.py @@ -1 +1 @@ -__version__ = '0.27.1' +__version__ = '0.28.0' diff --git a/docs/dbdataclasses/catalog.rst b/docs/dbdataclasses/catalog.rst index 1e9191464..9e1e26b1a 100644 --- a/docs/dbdataclasses/catalog.rst +++ b/docs/dbdataclasses/catalog.rst @@ -472,20 +472,6 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: -.. py:class:: DisableSchemaName - - .. py:attribute:: ACCESS - :value: "ACCESS" - - .. py:attribute:: BILLING - :value: "BILLING" - - .. py:attribute:: LINEAGE - :value: "LINEAGE" - - .. py:attribute:: OPERATIONAL_DATA - :value: "OPERATIONAL_DATA" - .. autoclass:: EffectivePermissionsList :members: :undoc-members: @@ -529,20 +515,6 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: -.. py:class:: EnableSchemaName - - .. py:attribute:: ACCESS - :value: "ACCESS" - - .. py:attribute:: BILLING - :value: "BILLING" - - .. py:attribute:: LINEAGE - :value: "LINEAGE" - - .. py:attribute:: OPERATIONAL_DATA - :value: "OPERATIONAL_DATA" - .. autoclass:: EncryptionDetails :members: :undoc-members: @@ -864,6 +836,10 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: SCHEDULE :value: "SCHEDULE" +.. autoclass:: MonitorRefreshListResponse + :members: + :undoc-members: + .. autoclass:: MonitorSnapshot :members: :undoc-members: @@ -1050,6 +1026,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: SET_SHARE_PERMISSION :value: "SET_SHARE_PERMISSION" + .. py:attribute:: SINGLE_USER_ACCESS + :value: "SINGLE_USER_ACCESS" + .. py:attribute:: USAGE :value: "USAGE" diff --git a/docs/dbdataclasses/compute.rst b/docs/dbdataclasses/compute.rst index 0341fbf75..6fad3e4ef 100644 --- a/docs/dbdataclasses/compute.rst +++ b/docs/dbdataclasses/compute.rst @@ -212,10 +212,6 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: -.. autoclass:: ClusterStatusResponse - :members: - :undoc-members: - .. autoclass:: Command :members: :undoc-members: @@ -312,7 +308,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: DataSecurityMode Data security mode decides what data governance model to use when accessing data from a cluster. - * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. + * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. + The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for future Databricks Runtime versions: + * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. .. py:attribute:: LEGACY_PASSTHROUGH :value: "LEGACY_PASSTHROUGH" diff --git a/docs/dbdataclasses/marketplace.rst b/docs/dbdataclasses/marketplace.rst index 50226a5d5..0c5cb3072 100644 --- a/docs/dbdataclasses/marketplace.rst +++ b/docs/dbdataclasses/marketplace.rst @@ -532,21 +532,6 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: SORT_BY_UNSPECIFIED :value: "SORT_BY_UNSPECIFIED" -.. autoclass:: SortBySpec - :members: - :undoc-members: - -.. py:class:: SortOrder - - .. py:attribute:: SORT_ORDER_ASCENDING - :value: "SORT_ORDER_ASCENDING" - - .. py:attribute:: SORT_ORDER_DESCENDING - :value: "SORT_ORDER_DESCENDING" - - .. py:attribute:: SORT_ORDER_UNSPECIFIED - :value: "SORT_ORDER_UNSPECIFIED" - .. autoclass:: TokenDetail :members: :undoc-members: diff --git a/docs/dbdataclasses/pipelines.rst b/docs/dbdataclasses/pipelines.rst index 993a5fba0..385bf2021 100644 --- a/docs/dbdataclasses/pipelines.rst +++ b/docs/dbdataclasses/pipelines.rst @@ -93,6 +93,10 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: IngestionGatewayPipelineDefinition + :members: + :undoc-members: + .. autoclass:: ListPipelineEventsResponse :members: :undoc-members: @@ -303,6 +307,20 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: TableSpecificConfig + :members: + :undoc-members: + +.. py:class:: TableSpecificConfigScdType + + The SCD type to use to ingest the table. + + .. py:attribute:: SCD_TYPE_1 + :value: "SCD_TYPE_1" + + .. py:attribute:: SCD_TYPE_2 + :value: "SCD_TYPE_2" + .. autoclass:: UpdateInfo :members: :undoc-members: diff --git a/docs/dbdataclasses/serving.rst b/docs/dbdataclasses/serving.rst index 9f2a1743a..89b0e2558 100644 --- a/docs/dbdataclasses/serving.rst +++ b/docs/dbdataclasses/serving.rst @@ -40,6 +40,10 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: AppDeploymentArtifacts + :members: + :undoc-members: + .. py:class:: AppDeploymentState .. py:attribute:: CANCELLED @@ -54,6 +58,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: STATE_UNSPECIFIED :value: "STATE_UNSPECIFIED" + .. py:attribute:: STOPPED + :value: "STOPPED" + .. py:attribute:: SUCCEEDED :value: "SUCCEEDED" diff --git a/docs/dbdataclasses/sharing.rst b/docs/dbdataclasses/sharing.rst index f25f3f575..ff48c9774 100644 --- a/docs/dbdataclasses/sharing.rst +++ b/docs/dbdataclasses/sharing.rst @@ -289,6 +289,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: SET_SHARE_PERMISSION :value: "SET_SHARE_PERMISSION" + .. py:attribute:: SINGLE_USER_ACCESS + :value: "SINGLE_USER_ACCESS" + .. py:attribute:: USAGE :value: "USAGE" diff --git a/docs/dbdataclasses/sql.rst b/docs/dbdataclasses/sql.rst index fe1469a30..adf3ced56 100644 --- a/docs/dbdataclasses/sql.rst +++ b/docs/dbdataclasses/sql.rst @@ -64,8 +64,6 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: ChannelName - Name of the channel - .. py:attribute:: CHANNEL_NAME_CURRENT :value: "CHANNEL_NAME_CURRENT" diff --git a/docs/dbdataclasses/vectorsearch.rst b/docs/dbdataclasses/vectorsearch.rst index ff7cc285b..1395ecb0b 100644 --- a/docs/dbdataclasses/vectorsearch.rst +++ b/docs/dbdataclasses/vectorsearch.rst @@ -105,10 +105,18 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: ListValue + :members: + :undoc-members: + .. autoclass:: ListVectorIndexesResponse :members: :undoc-members: +.. autoclass:: MapStringValueEntry + :members: + :undoc-members: + .. autoclass:: MiniVectorIndex :members: :undoc-members: @@ -140,6 +148,18 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: ScanVectorIndexRequest + :members: + :undoc-members: + +.. autoclass:: ScanVectorIndexResponse + :members: + :undoc-members: + +.. autoclass:: Struct + :members: + :undoc-members: + .. autoclass:: SyncIndexResponse :members: :undoc-members: @@ -169,6 +189,10 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: Value + :members: + :undoc-members: + .. autoclass:: VectorIndex :members: :undoc-members: diff --git a/docs/workspace/catalog/connections.rst b/docs/workspace/catalog/connections.rst index ba38188b4..b2637c2d0 100644 --- a/docs/workspace/catalog/connections.rst +++ b/docs/workspace/catalog/connections.rst @@ -128,7 +128,7 @@ :returns: :class:`ConnectionInfo` - .. py:method:: list() -> Iterator[ConnectionInfo] + .. py:method:: list( [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[ConnectionInfo] Usage: @@ -136,15 +136,24 @@ .. code-block:: from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog w = WorkspaceClient() - conn_list = w.connections.list() + conn_list = w.connections.list(catalog.ListConnectionsRequest()) List connections. List all connections. + :param max_results: int (optional) + Maximum number of connections to return. - If not set, all connections are returned (not + recommended). - when set to a value greater than 0, the page length is the minimum of this value and + a server configured value; - when set to 0, the page length is set to a server configured value + (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + :returns: Iterator over :class:`ConnectionInfo` diff --git a/docs/workspace/catalog/index.rst b/docs/workspace/catalog/index.rst index d61c95cf6..935804016 100644 --- a/docs/workspace/catalog/index.rst +++ b/docs/workspace/catalog/index.rst @@ -13,10 +13,10 @@ Configure data governance with Unity Catalog for metastores, catalogs, schemas, external_locations functions grants - lakehouse_monitors metastores model_versions online_tables + quality_monitors registered_models schemas storage_credentials diff --git a/docs/workspace/catalog/quality_monitors.rst b/docs/workspace/catalog/quality_monitors.rst new file mode 100644 index 000000000..030094049 --- /dev/null +++ b/docs/workspace/catalog/quality_monitors.rst @@ -0,0 +1,236 @@ +``w.quality_monitors``: Quality Monitors +======================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: QualityMonitorsAPI + + A monitor computes and monitors data or model quality metrics for a table over time. It generates metrics + tables and a dashboard that you can use to monitor table health and set alerts. + + Most write operations require the user to be the owner of the table (or its parent schema or parent + catalog). Viewing the dashboard, computed metrics, or monitor configuration only requires the user to have + **SELECT** privileges on the table (along with **USE_SCHEMA** and **USE_CATALOG**). + + .. py:method:: cancel_refresh(table_name: str, refresh_id: str) + + Cancel refresh. + + Cancel an active monitor refresh for the given refresh ID. + + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an + owner of the table + + Additionally, the call must be made from the workspace where the monitor was created. + + :param table_name: str + Full name of the table. + :param refresh_id: str + ID of the refresh. + + + + + .. py:method:: create(table_name: str, assets_dir: str, output_schema_name: str [, baseline_table_name: Optional[str], custom_metrics: Optional[List[MonitorMetric]], data_classification_config: Optional[MonitorDataClassificationConfig], inference_log: Optional[MonitorInferenceLog], notifications: Optional[MonitorNotifications], schedule: Optional[MonitorCronSchedule], skip_builtin_dashboard: Optional[bool], slicing_exprs: Optional[List[str]], snapshot: Optional[MonitorSnapshot], time_series: Optional[MonitorTimeSeries], warehouse_id: Optional[str]]) -> MonitorInfo + + Create a table monitor. + + Creates a new monitor for the specified table. + + The caller must either: 1. be an owner of the table's parent catalog, have **USE_SCHEMA** on the + table's parent schema, and have **SELECT** access on the table 2. have **USE_CATALOG** on the table's + parent catalog, be an owner of the table's parent schema, and have **SELECT** access on the table. 3. + have the following permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on + the table's parent schema - be an owner of the table. + + Workspace assets, such as the dashboard, will be created in the workspace where this call was made. + + :param table_name: str + Full name of the table. + :param assets_dir: str + The directory to store monitoring assets (e.g. dashboard, metric tables). + :param output_schema_name: str + Schema where output metric tables are created. + :param baseline_table_name: str (optional) + Name of the baseline table from which drift metrics are computed from. Columns in the monitored + table should also be present in the baseline table. + :param custom_metrics: List[:class:`MonitorMetric`] (optional) + Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics + (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). + :param data_classification_config: :class:`MonitorDataClassificationConfig` (optional) + The data classification config for the monitor. + :param inference_log: :class:`MonitorInferenceLog` (optional) + Configuration for monitoring inference logs. + :param notifications: :class:`MonitorNotifications` (optional) + The notification settings for the monitor. + :param schedule: :class:`MonitorCronSchedule` (optional) + The schedule for automatically updating and refreshing metric tables. + :param skip_builtin_dashboard: bool (optional) + Whether to skip creating a default dashboard summarizing data quality metrics. + :param slicing_exprs: List[str] (optional) + List of column expressions to slice data with for targeted analysis. The data is grouped by each + expression independently, resulting in a separate slice for each predicate and its complements. For + high-cardinality columns, only the top 100 unique values by frequency will generate slices. + :param snapshot: :class:`MonitorSnapshot` (optional) + Configuration for monitoring snapshot tables. + :param time_series: :class:`MonitorTimeSeries` (optional) + Configuration for monitoring time series tables. + :param warehouse_id: str (optional) + Optional argument to specify the warehouse for dashboard creation. If not specified, the first + running warehouse will be used. + + :returns: :class:`MonitorInfo` + + + .. py:method:: delete(table_name: str) + + Delete a table monitor. + + Deletes a monitor for the specified table. + + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an + owner of the table. + + Additionally, the call must be made from the workspace where the monitor was created. + + Note that the metric tables and dashboard will not be deleted as part of this call; those assets must + be manually cleaned up (if desired). + + :param table_name: str + Full name of the table. + + + + + .. py:method:: get(table_name: str) -> MonitorInfo + + Get a table monitor. + + Gets a monitor for the specified table. + + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema. 3. have the following + permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent + schema - **SELECT** privilege on the table. + + The returned information includes configuration values, as well as information on assets created by + the monitor. Some information (e.g., dashboard) may be filtered out if the caller is in a different + workspace than where the monitor was created. + + :param table_name: str + Full name of the table. + + :returns: :class:`MonitorInfo` + + + .. py:method:: get_refresh(table_name: str, refresh_id: str) -> MonitorRefreshInfo + + Get refresh. + + Gets info about a specific monitor refresh using the given refresh ID. + + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - + **SELECT** privilege on the table. + + Additionally, the call must be made from the workspace where the monitor was created. + + :param table_name: str + Full name of the table. + :param refresh_id: str + ID of the refresh. + + :returns: :class:`MonitorRefreshInfo` + + + .. py:method:: list_refreshes(table_name: str) -> MonitorRefreshListResponse + + List refreshes. + + Gets an array containing the history of the most recent refreshes (up to 25) for this table. + + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - + **SELECT** privilege on the table. + + Additionally, the call must be made from the workspace where the monitor was created. + + :param table_name: str + Full name of the table. + + :returns: :class:`MonitorRefreshListResponse` + + + .. py:method:: run_refresh(table_name: str) -> MonitorRefreshInfo + + Queue a metric refresh for a monitor. + + Queues a metric refresh on the monitor for the specified table. The refresh will execute in the + background. + + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an + owner of the table + + Additionally, the call must be made from the workspace where the monitor was created. + + :param table_name: str + Full name of the table. + + :returns: :class:`MonitorRefreshInfo` + + + .. py:method:: update(table_name: str, output_schema_name: str [, baseline_table_name: Optional[str], custom_metrics: Optional[List[MonitorMetric]], dashboard_id: Optional[str], data_classification_config: Optional[MonitorDataClassificationConfig], inference_log: Optional[MonitorInferenceLog], notifications: Optional[MonitorNotifications], schedule: Optional[MonitorCronSchedule], slicing_exprs: Optional[List[str]], snapshot: Optional[MonitorSnapshot], time_series: Optional[MonitorTimeSeries]]) -> MonitorInfo + + Update a table monitor. + + Updates a monitor for the specified table. + + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an + owner of the table. + + Additionally, the call must be made from the workspace where the monitor was created, and the caller + must be the original creator of the monitor. + + Certain configuration fields, such as output asset identifiers, cannot be updated. + + :param table_name: str + Full name of the table. + :param output_schema_name: str + Schema where output metric tables are created. + :param baseline_table_name: str (optional) + Name of the baseline table from which drift metrics are computed from. Columns in the monitored + table should also be present in the baseline table. + :param custom_metrics: List[:class:`MonitorMetric`] (optional) + Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics + (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). + :param dashboard_id: str (optional) + Id of dashboard that visualizes the computed metrics. This can be empty if the monitor is in PENDING + state. + :param data_classification_config: :class:`MonitorDataClassificationConfig` (optional) + The data classification config for the monitor. + :param inference_log: :class:`MonitorInferenceLog` (optional) + Configuration for monitoring inference logs. + :param notifications: :class:`MonitorNotifications` (optional) + The notification settings for the monitor. + :param schedule: :class:`MonitorCronSchedule` (optional) + The schedule for automatically updating and refreshing metric tables. + :param slicing_exprs: List[str] (optional) + List of column expressions to slice data with for targeted analysis. The data is grouped by each + expression independently, resulting in a separate slice for each predicate and its complements. For + high-cardinality columns, only the top 100 unique values by frequency will generate slices. + :param snapshot: :class:`MonitorSnapshot` (optional) + Configuration for monitoring snapshot tables. + :param time_series: :class:`MonitorTimeSeries` (optional) + Configuration for monitoring time series tables. + + :returns: :class:`MonitorInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/system_schemas.rst b/docs/workspace/catalog/system_schemas.rst index 13c4a56df..b9ab3b0f9 100644 --- a/docs/workspace/catalog/system_schemas.rst +++ b/docs/workspace/catalog/system_schemas.rst @@ -7,7 +7,7 @@ A system schema is a schema that lives within the system catalog. A system schema may contain information about customer usage of Unity Catalog such as audit-logs, billing-logs, lineage information, etc. - .. py:method:: disable(metastore_id: str, schema_name: DisableSchemaName) + .. py:method:: disable(metastore_id: str, schema_name: str) Disable a system schema. @@ -16,13 +16,13 @@ :param metastore_id: str The metastore ID under which the system schema lives. - :param schema_name: :class:`DisableSchemaName` + :param schema_name: str Full name of the system schema. - .. py:method:: enable(metastore_id: str, schema_name: EnableSchemaName) + .. py:method:: enable(metastore_id: str, schema_name: str) Enable a system schema. @@ -31,7 +31,7 @@ :param metastore_id: str The metastore ID under which the system schema lives. - :param schema_name: :class:`EnableSchemaName` + :param schema_name: str Full name of the system schema. diff --git a/docs/workspace/compute/clusters.rst b/docs/workspace/compute/clusters.rst index 437dc9cf6..2135afbfd 100644 --- a/docs/workspace/compute/clusters.rst +++ b/docs/workspace/compute/clusters.rst @@ -72,7 +72,7 @@ - .. py:method:: create(spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], clone_from: Optional[CloneCluster], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], cluster_source: Optional[ClusterSource], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType]]) -> Wait[ClusterDetails] + .. py:method:: create(spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], clone_from: Optional[CloneCluster], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType]]) -> Wait[ClusterDetails] Usage: @@ -112,6 +112,8 @@ The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. :param apply_policy_default_values: bool (optional) + When set to true, fixed and default values from the policy will be used for fields that are omitted. + When set to false, only fixed values from the policy will be applied. :param autoscale: :class:`AutoScale` (optional) Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. @@ -136,9 +138,6 @@ :param cluster_name: str (optional) Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. - :param cluster_source: :class:`ClusterSource` (optional) - Determines whether the cluster was created by a user through the UI, created by the Databricks Jobs - Scheduler, or through an API request. This is the same as cluster_creator, but read only. :param custom_tags: Dict[str,str] (optional) Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: @@ -155,10 +154,15 @@ governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages - and cluster features might be limited. * `LEGACY_TABLE_ACL`: This mode is for users migrating from - legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy - Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating - from legacy Passthrough on standard clusters. + and cluster features might be limited. + + The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for + future Databricks Runtime versions: + + * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * + `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency + clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on + standard clusters. :param docker_image: :class:`DockerImage` (optional) :param driver_instance_pool_id: str (optional) The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses @@ -229,7 +233,7 @@ See :method:wait_get_cluster_running for more details. - .. py:method:: create_and_wait(spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], clone_from: Optional[CloneCluster], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], cluster_source: Optional[ClusterSource], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType], timeout: datetime.timedelta = 0:20:00]) -> ClusterDetails + .. py:method:: create_and_wait(spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], clone_from: Optional[CloneCluster], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType], timeout: datetime.timedelta = 0:20:00]) -> ClusterDetails .. py:method:: delete(cluster_id: str) -> Wait[ClusterDetails] @@ -278,7 +282,7 @@ .. py:method:: delete_and_wait(cluster_id: str, timeout: datetime.timedelta = 0:20:00) -> ClusterDetails - .. py:method:: edit(cluster_id: str, spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], clone_from: Optional[CloneCluster], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], cluster_source: Optional[ClusterSource], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType]]) -> Wait[ClusterDetails] + .. py:method:: edit(cluster_id: str, spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType]]) -> Wait[ClusterDetails] Usage: @@ -332,6 +336,8 @@ The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. :param apply_policy_default_values: bool (optional) + When set to true, fixed and default values from the policy will be used for fields that are omitted. + When set to false, only fixed values from the policy will be applied. :param autoscale: :class:`AutoScale` (optional) Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. @@ -345,8 +351,6 @@ :param azure_attributes: :class:`AzureAttributes` (optional) Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. - :param clone_from: :class:`CloneCluster` (optional) - When specified, this clones libraries from a source cluster during the creation of a new cluster. :param cluster_log_conf: :class:`ClusterLogConf` (optional) The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If @@ -356,9 +360,6 @@ :param cluster_name: str (optional) Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. - :param cluster_source: :class:`ClusterSource` (optional) - Determines whether the cluster was created by a user through the UI, created by the Databricks Jobs - Scheduler, or through an API request. This is the same as cluster_creator, but read only. :param custom_tags: Dict[str,str] (optional) Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: @@ -375,10 +376,15 @@ governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages - and cluster features might be limited. * `LEGACY_TABLE_ACL`: This mode is for users migrating from - legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy - Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating - from legacy Passthrough on standard clusters. + and cluster features might be limited. + + The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for + future Databricks Runtime versions: + + * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * + `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency + clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on + standard clusters. :param docker_image: :class:`DockerImage` (optional) :param driver_instance_pool_id: str (optional) The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses @@ -449,7 +455,7 @@ See :method:wait_get_cluster_running for more details. - .. py:method:: edit_and_wait(cluster_id: str, spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], clone_from: Optional[CloneCluster], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], cluster_source: Optional[ClusterSource], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType], timeout: datetime.timedelta = 0:20:00]) -> ClusterDetails + .. py:method:: edit_and_wait(cluster_id: str, spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType], timeout: datetime.timedelta = 0:20:00]) -> ClusterDetails .. py:method:: ensure_cluster_is_running(cluster_id: str) diff --git a/docs/workspace/compute/libraries.rst b/docs/workspace/compute/libraries.rst index e295df064..64f688fdc 100644 --- a/docs/workspace/compute/libraries.rst +++ b/docs/workspace/compute/libraries.rst @@ -18,14 +18,14 @@ When you uninstall a library from a cluster, the library is removed only when you restart the cluster. Until you restart the cluster, the status of the uninstalled library appears as Uninstall pending restart. - .. py:method:: all_cluster_statuses() -> ListAllClusterLibraryStatusesResponse + .. py:method:: all_cluster_statuses() -> Iterator[ClusterLibraryStatuses] Get all statuses. Get the status of all libraries on all clusters. A status is returned for all libraries installed on this cluster via the API or the libraries UI. - :returns: :class:`ListAllClusterLibraryStatusesResponse` + :returns: Iterator over :class:`ClusterLibraryStatuses` .. py:method:: cluster_status(cluster_id: str) -> Iterator[LibraryFullStatus] diff --git a/docs/workspace/jobs/jobs.rst b/docs/workspace/jobs/jobs.rst index 4c99a4664..32cfd55c4 100644 --- a/docs/workspace/jobs/jobs.rst +++ b/docs/workspace/jobs/jobs.rst @@ -677,8 +677,6 @@ [dbutils.widgets.get]: https://docs.databricks.com/dev-tools/databricks-utils.html :param pipeline_params: :class:`PipelineParams` (optional) :param python_named_params: Dict[str,str] (optional) - A map from keys to values for jobs with Python wheel task, for example `"python_named_params": - {"name": "task", "data": "dbfs:/path/to/data.json"}`. :param python_params: List[str] (optional) A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it @@ -868,8 +866,6 @@ [dbutils.widgets.get]: https://docs.databricks.com/dev-tools/databricks-utils.html :param pipeline_params: :class:`PipelineParams` (optional) :param python_named_params: Dict[str,str] (optional) - A map from keys to values for jobs with Python wheel task, for example `"python_named_params": - {"name": "task", "data": "dbfs:/path/to/data.json"}`. :param python_params: List[str] (optional) A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it diff --git a/docs/workspace/marketplace/consumer_listings.rst b/docs/workspace/marketplace/consumer_listings.rst index 4bef0319d..95113161e 100644 --- a/docs/workspace/marketplace/consumer_listings.rst +++ b/docs/workspace/marketplace/consumer_listings.rst @@ -18,7 +18,7 @@ :returns: :class:`GetListingResponse` - .. py:method:: list( [, assets: Optional[List[AssetType]], categories: Optional[List[Category]], is_free: Optional[bool], is_private_exchange: Optional[bool], is_staff_pick: Optional[bool], page_size: Optional[int], page_token: Optional[str], provider_ids: Optional[List[str]], sort_by_spec: Optional[SortBySpec], tags: Optional[List[ListingTag]]]) -> Iterator[Listing] + .. py:method:: list( [, assets: Optional[List[AssetType]], categories: Optional[List[Category]], is_ascending: Optional[bool], is_free: Optional[bool], is_private_exchange: Optional[bool], is_staff_pick: Optional[bool], page_size: Optional[int], page_token: Optional[str], provider_ids: Optional[List[str]], sort_by: Optional[SortBy], tags: Optional[List[ListingTag]]]) -> Iterator[Listing] List listings. @@ -28,6 +28,7 @@ Matches any of the following asset types :param categories: List[:class:`Category`] (optional) Matches any of the following categories + :param is_ascending: bool (optional) :param is_free: bool (optional) Filters each listing based on if it is free. :param is_private_exchange: bool (optional) @@ -38,7 +39,7 @@ :param page_token: str (optional) :param provider_ids: List[str] (optional) Matches any of the following provider ids - :param sort_by_spec: :class:`SortBySpec` (optional) + :param sort_by: :class:`SortBy` (optional) Criteria for sorting the resulting set of listings. :param tags: List[:class:`ListingTag`] (optional) Matches any of the following tags @@ -46,7 +47,7 @@ :returns: Iterator over :class:`Listing` - .. py:method:: search(query: str [, assets: Optional[List[AssetType]], categories: Optional[List[Category]], is_free: Optional[bool], is_private_exchange: Optional[bool], page_size: Optional[int], page_token: Optional[str], provider_ids: Optional[List[str]], sort_by: Optional[SortBy]]) -> Iterator[Listing] + .. py:method:: search(query: str [, assets: Optional[List[AssetType]], categories: Optional[List[Category]], is_ascending: Optional[bool], is_free: Optional[bool], is_private_exchange: Optional[bool], page_size: Optional[int], page_token: Optional[str], provider_ids: Optional[List[str]], sort_by: Optional[SortBy]]) -> Iterator[Listing] Search listings. @@ -59,6 +60,7 @@ Matches any of the following asset types :param categories: List[:class:`Category`] (optional) Matches any of the following categories + :param is_ascending: bool (optional) :param is_free: bool (optional) :param is_private_exchange: bool (optional) :param page_size: int (optional) diff --git a/docs/workspace/pipelines/pipelines.rst b/docs/workspace/pipelines/pipelines.rst index 1237c2be9..a80e7c799 100644 --- a/docs/workspace/pipelines/pipelines.rst +++ b/docs/workspace/pipelines/pipelines.rst @@ -15,7 +15,7 @@ also enforce data quality with Delta Live Tables expectations. Expectations allow you to define expected data quality and specify how to handle records that fail those expectations. - .. py:method:: create( [, allow_duplicate_names: Optional[bool], catalog: Optional[str], channel: Optional[str], clusters: Optional[List[PipelineCluster]], configuration: Optional[Dict[str, str]], continuous: Optional[bool], deployment: Optional[PipelineDeployment], development: Optional[bool], dry_run: Optional[bool], edition: Optional[str], filters: Optional[Filters], id: Optional[str], ingestion_definition: Optional[ManagedIngestionPipelineDefinition], libraries: Optional[List[PipelineLibrary]], name: Optional[str], notifications: Optional[List[Notifications]], photon: Optional[bool], serverless: Optional[bool], storage: Optional[str], target: Optional[str], trigger: Optional[PipelineTrigger]]) -> CreatePipelineResponse + .. py:method:: create( [, allow_duplicate_names: Optional[bool], catalog: Optional[str], channel: Optional[str], clusters: Optional[List[PipelineCluster]], configuration: Optional[Dict[str, str]], continuous: Optional[bool], deployment: Optional[PipelineDeployment], development: Optional[bool], dry_run: Optional[bool], edition: Optional[str], filters: Optional[Filters], gateway_definition: Optional[IngestionGatewayPipelineDefinition], id: Optional[str], ingestion_definition: Optional[ManagedIngestionPipelineDefinition], libraries: Optional[List[PipelineLibrary]], name: Optional[str], notifications: Optional[List[Notifications]], photon: Optional[bool], serverless: Optional[bool], storage: Optional[str], target: Optional[str], trigger: Optional[PipelineTrigger]]) -> CreatePipelineResponse Usage: @@ -76,6 +76,8 @@ Pipeline product edition. :param filters: :class:`Filters` (optional) Filters on which Pipeline packages to include in the deployed graph. + :param gateway_definition: :class:`IngestionGatewayPipelineDefinition` (optional) + The definition of a gateway pipeline to support CDC. :param id: str (optional) Unique identifier for this pipeline. :param ingestion_definition: :class:`ManagedIngestionPipelineDefinition` (optional) @@ -369,7 +371,7 @@ .. py:method:: stop_and_wait(pipeline_id: str, timeout: datetime.timedelta = 0:20:00) -> GetPipelineResponse - .. py:method:: update(pipeline_id: str [, allow_duplicate_names: Optional[bool], catalog: Optional[str], channel: Optional[str], clusters: Optional[List[PipelineCluster]], configuration: Optional[Dict[str, str]], continuous: Optional[bool], deployment: Optional[PipelineDeployment], development: Optional[bool], edition: Optional[str], expected_last_modified: Optional[int], filters: Optional[Filters], id: Optional[str], ingestion_definition: Optional[ManagedIngestionPipelineDefinition], libraries: Optional[List[PipelineLibrary]], name: Optional[str], notifications: Optional[List[Notifications]], photon: Optional[bool], serverless: Optional[bool], storage: Optional[str], target: Optional[str], trigger: Optional[PipelineTrigger]]) + .. py:method:: update(pipeline_id: str [, allow_duplicate_names: Optional[bool], catalog: Optional[str], channel: Optional[str], clusters: Optional[List[PipelineCluster]], configuration: Optional[Dict[str, str]], continuous: Optional[bool], deployment: Optional[PipelineDeployment], development: Optional[bool], edition: Optional[str], expected_last_modified: Optional[int], filters: Optional[Filters], gateway_definition: Optional[IngestionGatewayPipelineDefinition], id: Optional[str], ingestion_definition: Optional[ManagedIngestionPipelineDefinition], libraries: Optional[List[PipelineLibrary]], name: Optional[str], notifications: Optional[List[Notifications]], photon: Optional[bool], serverless: Optional[bool], storage: Optional[str], target: Optional[str], trigger: Optional[PipelineTrigger]]) Usage: @@ -446,6 +448,8 @@ modified after that time, then the request will fail with a conflict. :param filters: :class:`Filters` (optional) Filters on which Pipeline packages to include in the deployed graph. + :param gateway_definition: :class:`IngestionGatewayPipelineDefinition` (optional) + The definition of a gateway pipeline to support CDC. :param id: str (optional) Unique identifier for this pipeline. :param ingestion_definition: :class:`ManagedIngestionPipelineDefinition` (optional) diff --git a/docs/workspace/serving/serving_endpoints.rst b/docs/workspace/serving/serving_endpoints.rst index 4a90c78ea..9244f333a 100644 --- a/docs/workspace/serving/serving_endpoints.rst +++ b/docs/workspace/serving/serving_endpoints.rst @@ -29,7 +29,7 @@ :returns: :class:`BuildLogsResponse` - .. py:method:: create(name: str, config: EndpointCoreConfigInput [, rate_limits: Optional[List[RateLimit]], tags: Optional[List[EndpointTag]]]) -> Wait[ServingEndpointDetailed] + .. py:method:: create(name: str, config: EndpointCoreConfigInput [, rate_limits: Optional[List[RateLimit]], route_optimized: Optional[bool], tags: Optional[List[EndpointTag]]]) -> Wait[ServingEndpointDetailed] Create a new serving endpoint. @@ -41,6 +41,8 @@ :param rate_limits: List[:class:`RateLimit`] (optional) Rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now. + :param route_optimized: bool (optional) + Enable route optimization for the serving endpoint. :param tags: List[:class:`EndpointTag`] (optional) Tags to be attached to the serving endpoint and automatically propagated to billing logs. @@ -49,7 +51,7 @@ See :method:wait_get_serving_endpoint_not_updating for more details. - .. py:method:: create_and_wait(name: str, config: EndpointCoreConfigInput [, rate_limits: Optional[List[RateLimit]], tags: Optional[List[EndpointTag]], timeout: datetime.timedelta = 0:20:00]) -> ServingEndpointDetailed + .. py:method:: create_and_wait(name: str, config: EndpointCoreConfigInput [, rate_limits: Optional[List[RateLimit]], route_optimized: Optional[bool], tags: Optional[List[EndpointTag]], timeout: datetime.timedelta = 0:20:00]) -> ServingEndpointDetailed .. py:method:: delete(name: str) @@ -62,7 +64,7 @@ - .. py:method:: export_metrics(name: str) + .. py:method:: export_metrics(name: str) -> ExportMetricsResponse Get metrics of a serving endpoint. @@ -72,7 +74,7 @@ :param name: str The name of the serving endpoint to retrieve metrics for. This field is required. - + :returns: :class:`ExportMetricsResponse` .. py:method:: get(name: str) -> ServingEndpointDetailed diff --git a/docs/workspace/sharing/shares.rst b/docs/workspace/sharing/shares.rst index 7c7eef49d..82cdd4e6f 100644 --- a/docs/workspace/sharing/shares.rst +++ b/docs/workspace/sharing/shares.rst @@ -9,7 +9,7 @@ register data assets under their original name, qualified by their original schema, or provide alternate exposed names. - .. py:method:: create(name: str [, comment: Optional[str]]) -> ShareInfo + .. py:method:: create(name: str [, comment: Optional[str], storage_root: Optional[str]]) -> ShareInfo Usage: @@ -36,6 +36,8 @@ Name of the share. :param comment: str (optional) User-provided free-form text description. + :param storage_root: str (optional) + Storage root URL for the share. :returns: :class:`ShareInfo` @@ -119,7 +121,7 @@ :returns: :class:`PermissionsList` - .. py:method:: update(name: str [, comment: Optional[str], new_name: Optional[str], owner: Optional[str], updates: Optional[List[SharedDataObjectUpdate]]]) -> ShareInfo + .. py:method:: update(name: str [, comment: Optional[str], new_name: Optional[str], owner: Optional[str], storage_root: Optional[str], updates: Optional[List[SharedDataObjectUpdate]]]) -> ShareInfo Usage: @@ -174,6 +176,8 @@ In the case that the share name is changed, **updateShare** requires that the caller is both the share owner and a metastore admin. + If there are notebook files in the share, the __storage_root__ field cannot be updated. + For each table that is added through this method, the share owner must also have **SELECT** privilege on the table. This privilege must be maintained indefinitely for recipients to be able to access the table. Typically, you should use a group as the share owner. @@ -188,6 +192,8 @@ New name for the share. :param owner: str (optional) Username of current owner of share. + :param storage_root: str (optional) + Storage root URL for the share. :param updates: List[:class:`SharedDataObjectUpdate`] (optional) Array of shared data object updates. diff --git a/docs/workspace/vectorsearch/vector_search_indexes.rst b/docs/workspace/vectorsearch/vector_search_indexes.rst index 04486763b..5c2f5f456 100644 --- a/docs/workspace/vectorsearch/vector_search_indexes.rst +++ b/docs/workspace/vectorsearch/vector_search_indexes.rst @@ -120,6 +120,23 @@ :returns: :class:`QueryVectorIndexResponse` + .. py:method:: scan_index(index_name: str [, last_primary_key: Optional[str], num_results: Optional[int]]) -> ScanVectorIndexResponse + + Scan an index. + + Scan the specified vector index and return the first `num_results` entries after the exclusive + `primary_key`. + + :param index_name: str + Name of the vector index to scan. + :param last_primary_key: str (optional) + Primary key of the last entry returned in the previous scan. + :param num_results: int (optional) + Number of results to return. Defaults to 10. + + :returns: :class:`ScanVectorIndexResponse` + + .. py:method:: sync_index(index_name: str) Synchronize an index. diff --git a/docs/workspace/workspace/repos.rst b/docs/workspace/workspace/repos.rst index 584ad70b3..a5c602a3a 100644 --- a/docs/workspace/workspace/repos.rst +++ b/docs/workspace/workspace/repos.rst @@ -46,7 +46,8 @@ bitbucketCloud, gitLab, azureDevOpsServices, gitHubEnterprise, bitbucketServer, gitLabEnterpriseEdition and awsCodeCommit. :param path: str (optional) - Desired path for the repo in the workspace. Must be in the format /Repos/{folder}/{repo-name}. + Desired path for the repo in the workspace. Almost any path in the workspace can be chosen. If repo + is created in /Repos, path must be in the format /Repos/{folder}/{repo-name}. :param sparse_checkout: :class:`SparseCheckout` (optional) If specified, the repo will be created with sparse checkout enabled. You cannot enable/disable sparse checkout after the repo is created. @@ -145,7 +146,8 @@ Token used to get the next page of results. If not specified, returns the first page of results as well as a next page token if there are more results. :param path_prefix: str (optional) - Filters repos that have paths starting with the given path prefix. + Filters repos that have paths starting with the given path prefix. If not provided repos from /Repos + will be served. :returns: Iterator over :class:`RepoInfo` diff --git a/examples/connections/list_connections.py b/examples/connections/list_connections.py index 1ffe401a5..9da47eaf0 100755 --- a/examples/connections/list_connections.py +++ b/examples/connections/list_connections.py @@ -1,5 +1,6 @@ from databricks.sdk import WorkspaceClient +from databricks.sdk.service import catalog w = WorkspaceClient() -conn_list = w.connections.list() +conn_list = w.connections.list(catalog.ListConnectionsRequest())