diff --git a/.codegen/__init__.py.tmpl b/.codegen/__init__.py.tmpl index 40d06293e..df854970a 100644 --- a/.codegen/__init__.py.tmpl +++ b/.codegen/__init__.py.tmpl @@ -1,5 +1,6 @@ import databricks.sdk.core as client import databricks.sdk.dbutils as dbutils +from databricks.sdk.credentials_provider import CredentialsProvider from databricks.sdk.mixins.files import DbfsExt from databricks.sdk.mixins.compute import ClustersExt @@ -43,7 +44,7 @@ class WorkspaceClient: debug_headers: bool = None, product="unknown", product_version="0.0.0", - credentials_provider: client.CredentialsProvider = None, + credentials_provider: CredentialsProvider = None, config: client.Config = None): if not config: config = client.Config({{range $args}}{{.}}={{.}}, {{end}} @@ -91,7 +92,7 @@ class AccountClient: debug_headers: bool = None, product="unknown", product_version="0.0.0", - credentials_provider: client.CredentialsProvider = None, + credentials_provider: CredentialsProvider = None, config: client.Config = None): if not config: config = client.Config({{range $args}}{{.}}={{.}}, {{end}} diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index 2c3fb6e13..f705ffea6 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -d3853c8dee5806d04da2ae8910f273ffb35719a5 \ No newline at end of file +e05401ed5dd4974c5333d737ec308a7d451f749f \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index c28d21d3c..dd1547c93 100755 --- a/.gitattributes +++ b/.gitattributes @@ -3,6 +3,7 @@ databricks/sdk/errors/mapping.py linguist-generated=true databricks/sdk/service/billing.py linguist-generated=true databricks/sdk/service/catalog.py linguist-generated=true databricks/sdk/service/compute.py linguist-generated=true +databricks/sdk/service/dashboards.py linguist-generated=true databricks/sdk/service/files.py linguist-generated=true databricks/sdk/service/iam.py linguist-generated=true databricks/sdk/service/jobs.py linguist-generated=true @@ -14,6 +15,7 @@ databricks/sdk/service/serving.py linguist-generated=true databricks/sdk/service/settings.py linguist-generated=true databricks/sdk/service/sharing.py linguist-generated=true databricks/sdk/service/sql.py linguist-generated=true +databricks/sdk/service/vectorsearch.py linguist-generated=true databricks/sdk/service/workspace.py linguist-generated=true examples/alerts/create_alerts.py linguist-generated=true examples/alerts/get_alerts.py linguist-generated=true diff --git a/CHANGELOG.md b/CHANGELOG.md index 18408cfee..007eca6c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,76 @@ # Version changelog +## 0.18.0 + +Bugfixes: + +* Fix Databricks OAuth M2M on Azure ([#513](https://github.com/databricks/databricks-sdk-py/pull/513)). + +Other noteworthy changes: + +* Use `[]` instead of `None` as default list value for deserialising responses ([#361](https://github.com/databricks/databricks-sdk-py/pull/361)). +* Support dev and staging workspaces ([#514](https://github.com/databricks/databricks-sdk-py/pull/514)). + +API Changes: + + * Added `exists()` method for [w.tables](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/tables.html) workspace-level service. + * Added [w.lakehouse_monitors](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/lakehouse_monitors.html) workspace-level service. + * Added the following dataclasses: + `databricks.sdk.service.catalog.CreateMonitor`, + `databricks.sdk.service.catalog.DeleteLakehouseMonitorRequest`, + `databricks.sdk.service.catalog.ExistsRequest`, + `databricks.sdk.service.catalog.GetLakehouseMonitorRequest`, + `databricks.sdk.service.catalog.MonitorCronSchedule`, + `databricks.sdk.service.catalog.MonitorCronSchedulePauseStatus`, + `databricks.sdk.service.catalog.MonitorCustomMetric`, + `databricks.sdk.service.catalog.MonitorCustomMetricType`, + `databricks.sdk.service.catalog.MonitorDataClassificationConfig`, + `databricks.sdk.service.catalog.MonitorDestinations`, + `databricks.sdk.service.catalog.MonitorInferenceLogProfileType`, + `databricks.sdk.service.catalog.MonitorInferenceLogProfileTypeProblemType`, + `databricks.sdk.service.catalog.MonitorInfo`, + `databricks.sdk.service.catalog.MonitorInfoStatus`, + `databricks.sdk.service.catalog.MonitorNotificationsConfig`, + `databricks.sdk.service.catalog.MonitorTimeSeriesProfileType`, + `databricks.sdk.service.catalog.TableExistsResponse` and + `databricks.sdk.service.catalog.UpdateMonitor`. + * Changed `create_obo_token()` method for [w.token_management](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/token_management.html) workspace-level service with new required argument order. + * Changed `get()` method for [w.token_management](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/token_management.html) workspace-level service to return `databricks.sdk.service.settings.GetTokenResponse` dataclass. + * Changed `lifetime_seconds` field for `databricks.sdk.service.settings.CreateOboTokenRequest` to no longer be required. + * Added `databricks.sdk.service.settings.GetTokenResponse` dataclass. + +OpenAPI SHA: e05401ed5dd4974c5333d737ec308a7d451f749f, Date: 2024-01-23 + +## 0.17.0 + +* Use covariant type for `@retried(on=[...])` ([#486](https://github.com/databricks/databricks-sdk-py/pull/486)). +* Configure request timeout using existing parameter from Config ([#489](https://github.com/databricks/databricks-sdk-py/pull/489)). +* Make contents of `__init__.py` equal across projects ([#488](https://github.com/databricks/databricks-sdk-py/pull/488)). +* Update SDK to Latest OpenAPI Specification ([#501](https://github.com/databricks/databricks-sdk-py/pull/501)). + +Note: This release contains breaking changes, please see below for more details. + +API Changes: + + * [Breaking] Changed `list()` method for [w.tokens](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/tokens.html) workspace-level service to return `databricks.sdk.service.settings.ListPublicTokensResponse` dataclass. + * Changed `list()` method for [w.external_locations](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/external_locations.html) workspace-level service to require request of `databricks.sdk.service.catalog.ListExternalLocationsRequest` dataclass and [w.storage_credentials](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/storage_credentials.html) workspace-level service to require request of `databricks.sdk.service.catalog.ListStorageCredentialsRequest` dataclass. + * Added `next_page_token` field for `databricks.sdk.service.catalog.ListExternalLocationsResponse`, `databricks.sdk.service.catalog.ListFunctionsResponse`, `databricks.sdk.service.catalog.ListSchemasResponse` and `databricks.sdk.service.catalog.ListStorageCredentialsResponse`. + * Added `max_results` field for `databricks.sdk.service.catalog.ListFunctionsRequest` and `databricks.sdk.service.catalog.ListSchemasRequest`. + * Added `page_token` field for `databricks.sdk.service.catalog.ListFunctionsRequest` and `databricks.sdk.service.catalog.ListSchemasRequest`. + * Added `omit_columns` field for `databricks.sdk.service.catalog.ListTablesRequest`. + * Added `omit_properties` field for `databricks.sdk.service.catalog.ListTablesRequest`. + * Added `init_scripts` field for `databricks.sdk.service.pipelines.PipelineCluster`. + * Added `validate_only` field for `databricks.sdk.service.pipelines.StartUpdate` and `databricks.sdk.service.pipelines.UpdateInfo`. + * Changed `create()` method for [w.dashboards](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/dashboards.html) workspace-level service . New request type is `databricks.sdk.service.sql.DashboardPostContent` dataclass. + * Added `update()` method for [w.dashboards](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/dashboards.html) workspace-level service. + * Added `http_headers` field for `databricks.sdk.service.sql.ExternalLink`. + * Added `run_as_role` field for `databricks.sdk.service.sql.QueryEditContent`. + * Added package: `databricks.sdk.service.dashboards` and `databricks.sdk.service.vectorsearch`. + * Added dataclass: `databricks.sdk.service.catalog.ListExternalLocationsRequest`, `databricks.sdk.service.catalog.ListStorageCredentialsRequest`, `databricks.sdk.service.settings.ListPublicTokensResponse`, `databricks.sdk.service.sql.DashboardEditContent` and `databricks.sdk.service.sql.DashboardPostContent`. + * Removed dataclass: `databricks.sdk.service.catalog.TableConstraintList` and `databricks.sdk.service.sql.CreateDashboardRequest`. + +OpenAPI SHA: 0e0d4cbe87193e36c73b8b2be3b0dd0f1b013e00, Date: 2024-01-10 + ## 0.16.0 * Sort imports in service template ([#479](https://github.com/databricks/databricks-sdk-py/pull/479)). diff --git a/README.md b/README.md index 2fbb0d391..aae1fe884 100644 --- a/README.md +++ b/README.md @@ -151,12 +151,12 @@ The Databricks SDK for Python picks up an Azure CLI token, if you've previously To authenticate as an Azure Active Directory (Azure AD) service principal, you must provide one of the following. See also [Add a service principal to your Azure Databricks account](https://learn.microsoft.com/azure/databricks/administration-guide/users-groups/service-principals#add-sp-account): -- `azure_resource_id`, `azure_client_secret`, `azure_client_id`, and `azure_tenant_id`; or their environment variable or `.databrickscfg` file field equivalents. -- `azure_resource_id` and `azure_use_msi`; or their environment variable or `.databrickscfg` file field equivalents. +- `azure_workspace_resource_id`, `azure_client_secret`, `azure_client_id`, and `azure_tenant_id`; or their environment variable or `.databrickscfg` file field equivalents. +- `azure_workspace_resource_id` and `azure_use_msi`; or their environment variable or `.databrickscfg` file field equivalents. | Argument | Description | Environment variable | |-----------------------|-------------|----------------------| -| `azure_resource_id` | _(String)_ The Azure Resource Manager ID for the Azure Databricks workspace, which is exchanged for a Databricks host URL. | `DATABRICKS_AZURE_RESOURCE_ID` | +| `azure_workspace_resource_id` | _(String)_ The Azure Resource Manager ID for the Azure Databricks workspace, which is exchanged for a Databricks host URL. | `DATABRICKS_AZURE_RESOURCE_ID` | | `azure_use_msi` | _(Boolean)_ `true` to use Azure Managed Service Identity passwordless authentication flow for service principals. _This feature is not yet implemented in the Databricks SDK for Python._ | `ARM_USE_MSI` | | `azure_client_secret` | _(String)_ The Azure AD service principal's client secret. | `ARM_CLIENT_SECRET` | | `azure_client_id` | _(String)_ The Azure AD service principal's application ID. | `ARM_CLIENT_ID` | diff --git a/databricks/sdk/__init__.py b/databricks/sdk/__init__.py index 1f380ba00..13a97087c 100755 --- a/databricks/sdk/__init__.py +++ b/databricks/sdk/__init__.py @@ -1,5 +1,6 @@ import databricks.sdk.core as client import databricks.sdk.dbutils as dbutils +from databricks.sdk.credentials_provider import CredentialsProvider from databricks.sdk.mixins.compute import ClustersExt from databricks.sdk.mixins.files import DbfsExt from databricks.sdk.mixins.workspace import WorkspaceExt @@ -11,8 +12,8 @@ ArtifactAllowlistsAPI, CatalogsAPI, ConnectionsAPI, ExternalLocationsAPI, FunctionsAPI, - GrantsAPI, MetastoresAPI, - ModelVersionsAPI, + GrantsAPI, LakehouseMonitorsAPI, + MetastoresAPI, ModelVersionsAPI, RegisteredModelsAPI, SchemasAPI, StorageCredentialsAPI, SystemSchemasAPI, @@ -24,6 +25,7 @@ InstancePoolsAPI, InstanceProfilesAPI, LibrariesAPI, PolicyFamiliesAPI) +from databricks.sdk.service.dashboards import LakeviewAPI from databricks.sdk.service.files import DbfsAPI, FilesAPI from databricks.sdk.service.iam import (AccountAccessControlAPI, AccountAccessControlProxyAPI, @@ -62,6 +64,8 @@ QueryHistoryAPI, QueryVisualizationsAPI, StatementExecutionAPI, WarehousesAPI) +from databricks.sdk.service.vectorsearch import (VectorSearchEndpointsAPI, + VectorSearchIndexesAPI) from databricks.sdk.service.workspace import (GitCredentialsAPI, ReposAPI, SecretsAPI, WorkspaceAPI) @@ -111,7 +115,7 @@ def __init__(self, debug_headers: bool = None, product="unknown", product_version="0.0.0", - credentials_provider: client.CredentialsProvider = None, + credentials_provider: CredentialsProvider = None, config: client.Config = None): if not config: config = client.Config(host=host, @@ -169,6 +173,8 @@ def __init__(self, self._instance_profiles = InstanceProfilesAPI(self._api_client) self._ip_access_lists = IpAccessListsAPI(self._api_client) self._jobs = JobsAPI(self._api_client) + self._lakehouse_monitors = LakehouseMonitorsAPI(self._api_client) + self._lakeview = LakeviewAPI(self._api_client) self._libraries = LibrariesAPI(self._api_client) self._metastores = MetastoresAPI(self._api_client) self._model_registry = ModelRegistryAPI(self._api_client) @@ -198,6 +204,8 @@ def __init__(self, self._token_management = TokenManagementAPI(self._api_client) self._tokens = TokensAPI(self._api_client) self._users = UsersAPI(self._api_client) + self._vector_search_endpoints = VectorSearchEndpointsAPI(self._api_client) + self._vector_search_indexes = VectorSearchIndexesAPI(self._api_client) self._volumes = VolumesAPI(self._api_client) self._warehouses = WarehousesAPI(self._api_client) self._workspace = WorkspaceExt(self._api_client) @@ -361,6 +369,16 @@ def jobs(self) -> JobsAPI: """The Jobs API allows you to create, edit, and delete jobs.""" return self._jobs + @property + def lakehouse_monitors(self) -> LakehouseMonitorsAPI: + """A monitor computes and monitors data or model quality metrics for a table over time.""" + return self._lakehouse_monitors + + @property + def lakeview(self) -> LakeviewAPI: + """These APIs provide specific management operations for Lakeview dashboards.""" + return self._lakeview + @property def libraries(self) -> LibrariesAPI: """The Libraries API allows you to install and uninstall libraries and get the status of libraries on a cluster.""" @@ -506,6 +524,16 @@ def users(self) -> UsersAPI: """User identities recognized by Databricks and represented by email addresses.""" return self._users + @property + def vector_search_endpoints(self) -> VectorSearchEndpointsAPI: + """**Endpoint**: Represents the compute resources to host vector search indexes.""" + return self._vector_search_endpoints + + @property + def vector_search_indexes(self) -> VectorSearchIndexesAPI: + """**Index**: An efficient representation of your embedding vectors that supports real-time and efficient approximate nearest neighbor (ANN) search queries.""" + return self._vector_search_indexes + @property def volumes(self) -> VolumesAPI: """Volumes are a Unity Catalog (UC) capability for accessing, storing, governing, organizing and processing files.""" @@ -564,7 +592,7 @@ def __init__(self, debug_headers: bool = None, product="unknown", product_version="0.0.0", - credentials_provider: client.CredentialsProvider = None, + credentials_provider: CredentialsProvider = None, config: client.Config = None): if not config: config = client.Config(host=host, diff --git a/databricks/sdk/azure.py b/databricks/sdk/azure.py index e079a8c94..3e008d8c2 100644 --- a/databricks/sdk/azure.py +++ b/databricks/sdk/azure.py @@ -15,19 +15,15 @@ class AzureEnvironment: ARM_DATABRICKS_RESOURCE_ID = "2ff814a6-3304-4ab8-85cb-cd0e6f879c1d" ENVIRONMENTS = dict( - PUBLIC=AzureEnvironment(name="AzurePublicCloud", + PUBLIC=AzureEnvironment(name="PUBLIC", service_management_endpoint="https://management.core.windows.net/", resource_manager_endpoint="https://management.azure.com/", active_directory_endpoint="https://login.microsoftonline.com/"), - GERMAN=AzureEnvironment(name="AzureGermanCloud", - service_management_endpoint="https://management.core.cloudapi.de/", - resource_manager_endpoint="https://management.microsoftazure.de/", - active_directory_endpoint="https://login.microsoftonline.de/"), - USGOVERNMENT=AzureEnvironment(name="AzureUSGovernmentCloud", + USGOVERNMENT=AzureEnvironment(name="USGOVERNMENT", service_management_endpoint="https://management.core.usgovcloudapi.net/", resource_manager_endpoint="https://management.usgovcloudapi.net/", active_directory_endpoint="https://login.microsoftonline.us/"), - CHINA=AzureEnvironment(name="AzureChinaCloud", + CHINA=AzureEnvironment(name="CHINA", service_management_endpoint="https://management.core.chinacloudapi.cn/", resource_manager_endpoint="https://management.chinacloudapi.cn/", active_directory_endpoint="https://login.chinacloudapi.cn/"), diff --git a/databricks/sdk/config.py b/databricks/sdk/config.py new file mode 100644 index 000000000..0dadfc927 --- /dev/null +++ b/databricks/sdk/config.py @@ -0,0 +1,452 @@ +import configparser +import copy +import logging +import os +import pathlib +import platform +import sys +import urllib.parse +from typing import Dict, Iterable, Optional + +import requests + +from .azure import AzureEnvironment +from .credentials_provider import CredentialsProvider, DefaultCredentials +from .environments import (ALL_ENVS, DEFAULT_ENVIRONMENT, Cloud, + DatabricksEnvironment) +from .oauth import OidcEndpoints +from .version import __version__ + +logger = logging.getLogger('databricks.sdk') + + +class ConfigAttribute: + """ Configuration attribute metadata and descriptor protocols. """ + + # name and transform are discovered from Config.__new__ + name: str = None + transform: type = str + + def __init__(self, env: str = None, auth: str = None, sensitive: bool = False): + self.env = env + self.auth = auth + self.sensitive = sensitive + + def __get__(self, cfg: 'Config', owner): + if not cfg: + return None + return cfg._inner.get(self.name, None) + + def __set__(self, cfg: 'Config', value: any): + cfg._inner[self.name] = self.transform(value) + + def __repr__(self) -> str: + return f"" + + +class Config: + host: str = ConfigAttribute(env='DATABRICKS_HOST') + account_id: str = ConfigAttribute(env='DATABRICKS_ACCOUNT_ID') + token: str = ConfigAttribute(env='DATABRICKS_TOKEN', auth='pat', sensitive=True) + username: str = ConfigAttribute(env='DATABRICKS_USERNAME', auth='basic') + password: str = ConfigAttribute(env='DATABRICKS_PASSWORD', auth='basic', sensitive=True) + client_id: str = ConfigAttribute(env='DATABRICKS_CLIENT_ID', auth='oauth') + client_secret: str = ConfigAttribute(env='DATABRICKS_CLIENT_SECRET', auth='oauth', sensitive=True) + profile: str = ConfigAttribute(env='DATABRICKS_CONFIG_PROFILE') + config_file: str = ConfigAttribute(env='DATABRICKS_CONFIG_FILE') + google_service_account: str = ConfigAttribute(env='DATABRICKS_GOOGLE_SERVICE_ACCOUNT', auth='google') + google_credentials: str = ConfigAttribute(env='GOOGLE_CREDENTIALS', auth='google', sensitive=True) + azure_workspace_resource_id: str = ConfigAttribute(env='DATABRICKS_AZURE_RESOURCE_ID', auth='azure') + azure_use_msi: bool = ConfigAttribute(env='ARM_USE_MSI', auth='azure') + azure_client_secret: str = ConfigAttribute(env='ARM_CLIENT_SECRET', auth='azure', sensitive=True) + azure_client_id: str = ConfigAttribute(env='ARM_CLIENT_ID', auth='azure') + azure_tenant_id: str = ConfigAttribute(env='ARM_TENANT_ID', auth='azure') + azure_environment: str = ConfigAttribute(env='ARM_ENVIRONMENT') + databricks_cli_path: str = ConfigAttribute(env='DATABRICKS_CLI_PATH') + auth_type: str = ConfigAttribute(env='DATABRICKS_AUTH_TYPE') + cluster_id: str = ConfigAttribute(env='DATABRICKS_CLUSTER_ID') + warehouse_id: str = ConfigAttribute(env='DATABRICKS_WAREHOUSE_ID') + skip_verify: bool = ConfigAttribute() + http_timeout_seconds: float = ConfigAttribute() + debug_truncate_bytes: int = ConfigAttribute(env='DATABRICKS_DEBUG_TRUNCATE_BYTES') + debug_headers: bool = ConfigAttribute(env='DATABRICKS_DEBUG_HEADERS') + rate_limit: int = ConfigAttribute(env='DATABRICKS_RATE_LIMIT') + retry_timeout_seconds: int = ConfigAttribute() + metadata_service_url = ConfigAttribute(env='DATABRICKS_METADATA_SERVICE_URL', + auth='metadata-service', + sensitive=True) + max_connection_pools: int = ConfigAttribute() + max_connections_per_pool: int = ConfigAttribute() + databricks_environment: Optional[DatabricksEnvironment] = None + + def __init__(self, + *, + credentials_provider: CredentialsProvider = None, + product="unknown", + product_version="0.0.0", + **kwargs): + self._inner = {} + self._user_agent_other_info = [] + self._credentials_provider = credentials_provider if credentials_provider else DefaultCredentials() + if 'databricks_environment' in kwargs: + self.databricks_environment = kwargs['databricks_environment'] + del kwargs['databricks_environment'] + try: + self._set_inner_config(kwargs) + self._load_from_env() + self._known_file_config_loader() + self._fix_host_if_needed() + self._validate() + self._init_auth() + self._product = product + self._product_version = product_version + except ValueError as e: + message = self.wrap_debug_info(str(e)) + raise ValueError(message) from e + + def wrap_debug_info(self, message: str) -> str: + debug_string = self.debug_string() + if debug_string: + message = f'{message.rstrip(".")}. {debug_string}' + return message + + @staticmethod + def parse_dsn(dsn: str) -> 'Config': + uri = urllib.parse.urlparse(dsn) + if uri.scheme != 'databricks': + raise ValueError(f'Expected databricks:// scheme, got {uri.scheme}://') + kwargs = {'host': f'https://{uri.hostname}'} + if uri.username: + kwargs['username'] = uri.username + if uri.password: + kwargs['password'] = uri.password + query = dict(urllib.parse.parse_qsl(uri.query)) + for attr in Config.attributes(): + if attr.name not in query: + continue + kwargs[attr.name] = query[attr.name] + return Config(**kwargs) + + def authenticate(self) -> Dict[str, str]: + """ Returns a list of fresh authentication headers """ + return self._header_factory() + + def as_dict(self) -> dict: + return self._inner + + def _get_azure_environment_name(self) -> str: + if not self.azure_environment: + return "PUBLIC" + env = self.azure_environment.upper() + # Compatibility with older versions of the SDK that allowed users to specify AzurePublicCloud or AzureChinaCloud + if env.startswith("AZURE"): + env = env[len("AZURE"):] + if env.endswith("CLOUD"): + env = env[:-len("CLOUD")] + return env + + @property + def environment(self) -> DatabricksEnvironment: + """Returns the environment based on configuration.""" + if self.databricks_environment: + return self.databricks_environment + if self.host: + for environment in ALL_ENVS: + if self.host.endswith(environment.dns_zone): + return environment + if self.azure_workspace_resource_id: + azure_env = self._get_azure_environment_name() + for environment in ALL_ENVS: + if environment.cloud != Cloud.AZURE: + continue + if environment.azure_environment.name != azure_env: + continue + if environment.dns_zone.startswith(".dev") or environment.dns_zone.startswith(".staging"): + continue + return environment + return DEFAULT_ENVIRONMENT + + @property + def is_azure(self) -> bool: + return self.environment.cloud == Cloud.AZURE + + @property + def is_gcp(self) -> bool: + return self.environment.cloud == Cloud.GCP + + @property + def is_aws(self) -> bool: + return self.environment.cloud == Cloud.AWS + + @property + def is_account_client(self) -> bool: + if not self.host: + return False + return self.host.startswith("https://accounts.") or self.host.startswith("https://accounts-dod.") + + @property + def arm_environment(self) -> AzureEnvironment: + return self.environment.azure_environment + + @property + def effective_azure_login_app_id(self): + return self.environment.azure_application_id + + @property + def hostname(self) -> str: + url = urllib.parse.urlparse(self.host) + return url.netloc + + @property + def is_any_auth_configured(self) -> bool: + for attr in Config.attributes(): + if not attr.auth: + continue + value = self._inner.get(attr.name, None) + if value: + return True + return False + + @property + def user_agent(self): + """ Returns User-Agent header used by this SDK """ + py_version = platform.python_version() + os_name = platform.uname().system.lower() + + ua = [ + f"{self._product}/{self._product_version}", f"databricks-sdk-py/{__version__}", + f"python/{py_version}", f"os/{os_name}", f"auth/{self.auth_type}", + ] + if len(self._user_agent_other_info) > 0: + ua.append(' '.join(self._user_agent_other_info)) + if len(self._upstream_user_agent) > 0: + ua.append(self._upstream_user_agent) + if 'DATABRICKS_RUNTIME_VERSION' in os.environ: + runtime_version = os.environ['DATABRICKS_RUNTIME_VERSION'] + if runtime_version != '': + runtime_version = self._sanitize_header_value(runtime_version) + ua.append(f'runtime/{runtime_version}') + + return ' '.join(ua) + + @staticmethod + def _sanitize_header_value(value: str) -> str: + value = value.replace(' ', '-') + value = value.replace('/', '-') + return value + + @property + def _upstream_user_agent(self) -> str: + product = os.environ.get('DATABRICKS_SDK_UPSTREAM', None) + product_version = os.environ.get('DATABRICKS_SDK_UPSTREAM_VERSION', None) + if product is not None and product_version is not None: + return f"upstream/{product} upstream-version/{product_version}" + return "" + + def with_user_agent_extra(self, key: str, value: str) -> 'Config': + self._user_agent_other_info.append(f"{key}/{value}") + return self + + @property + def oidc_endpoints(self) -> Optional[OidcEndpoints]: + self._fix_host_if_needed() + if not self.host: + return None + if self.is_azure and self.azure_client_id: + # Retrieve authorize endpoint to retrieve token endpoint after + res = requests.get(f'{self.host}/oidc/oauth2/v2.0/authorize', allow_redirects=False) + real_auth_url = res.headers.get('location') + if not real_auth_url: + return None + return OidcEndpoints(authorization_endpoint=real_auth_url, + token_endpoint=real_auth_url.replace('/authorize', '/token')) + if self.is_account_client and self.account_id: + prefix = f'{self.host}/oidc/accounts/{self.account_id}' + return OidcEndpoints(authorization_endpoint=f'{prefix}/v1/authorize', + token_endpoint=f'{prefix}/v1/token') + oidc = f'{self.host}/oidc/.well-known/oauth-authorization-server' + res = requests.get(oidc) + if res.status_code != 200: + return None + auth_metadata = res.json() + return OidcEndpoints(authorization_endpoint=auth_metadata.get('authorization_endpoint'), + token_endpoint=auth_metadata.get('token_endpoint')) + + def debug_string(self) -> str: + """ Returns log-friendly representation of configured attributes """ + buf = [] + attrs_used = [] + envs_used = [] + for attr in Config.attributes(): + if attr.env and os.environ.get(attr.env): + envs_used.append(attr.env) + value = getattr(self, attr.name) + if not value: + continue + safe = '***' if attr.sensitive else f'{value}' + attrs_used.append(f'{attr.name}={safe}') + if attrs_used: + buf.append(f'Config: {", ".join(attrs_used)}') + if envs_used: + buf.append(f'Env: {", ".join(envs_used)}') + return '. '.join(buf) + + def to_dict(self) -> Dict[str, any]: + return self._inner + + @property + def sql_http_path(self) -> Optional[str]: + """(Experimental) Return HTTP path for SQL Drivers. + + If `cluster_id` or `warehouse_id` are configured, return a valid HTTP Path argument + used in construction of JDBC/ODBC DSN string. + + See https://docs.databricks.com/integrations/jdbc-odbc-bi.html + """ + if (not self.cluster_id) and (not self.warehouse_id): + return None + if self.cluster_id and self.warehouse_id: + raise ValueError('cannot have both cluster_id and warehouse_id') + headers = self.authenticate() + headers['User-Agent'] = f'{self.user_agent} sdk-feature/sql-http-path' + if self.cluster_id: + response = requests.get(f"{self.host}/api/2.0/preview/scim/v2/Me", headers=headers) + # get workspace ID from the response header + workspace_id = response.headers.get('x-databricks-org-id') + return f'sql/protocolv1/o/{workspace_id}/{self.cluster_id}' + if self.warehouse_id: + return f'/sql/1.0/warehouses/{self.warehouse_id}' + + @classmethod + def attributes(cls) -> Iterable[ConfigAttribute]: + """ Returns a list of Databricks SDK configuration metadata """ + if hasattr(cls, '_attributes'): + return cls._attributes + if sys.version_info[1] >= 10: + import inspect + anno = inspect.get_annotations(cls) + else: + # Python 3.7 compatibility: getting type hints require extra hop, as described in + # "Accessing The Annotations Dict Of An Object In Python 3.9 And Older" section of + # https://docs.python.org/3/howto/annotations.html + anno = cls.__dict__['__annotations__'] + attrs = [] + for name, v in cls.__dict__.items(): + if type(v) != ConfigAttribute: + continue + v.name = name + v.transform = anno.get(name, str) + attrs.append(v) + cls._attributes = attrs + return cls._attributes + + def _fix_host_if_needed(self): + if not self.host: + return + # fix url to remove trailing slash + o = urllib.parse.urlparse(self.host) + if not o.hostname: + # only hostname is specified + self.host = f"https://{self.host}" + else: + self.host = f"{o.scheme}://{o.netloc}" + + def _set_inner_config(self, keyword_args: Dict[str, any]): + for attr in self.attributes(): + if attr.name not in keyword_args: + continue + if keyword_args.get(attr.name, None) is None: + continue + self.__setattr__(attr.name, keyword_args[attr.name]) + + def _load_from_env(self): + found = False + for attr in self.attributes(): + if not attr.env: + continue + if attr.name in self._inner: + continue + value = os.environ.get(attr.env) + if not value: + continue + self.__setattr__(attr.name, value) + found = True + if found: + logger.debug('Loaded from environment') + + def _known_file_config_loader(self): + if not self.profile and (self.is_any_auth_configured or self.host + or self.azure_workspace_resource_id): + # skip loading configuration file if there's any auth configured + # directly as part of the Config() constructor. + return + config_file = self.config_file + if not config_file: + config_file = "~/.databrickscfg" + config_path = pathlib.Path(config_file).expanduser() + if not config_path.exists(): + logger.debug("%s does not exist", config_path) + return + ini_file = configparser.ConfigParser() + ini_file.read(config_path) + profile = self.profile + has_explicit_profile = self.profile is not None + # In Go SDK, we skip merging the profile with DEFAULT section, though Python's ConfigParser.items() + # is returning profile key-value pairs _including those from DEFAULT_. This is not what we expect + # from Unified Auth test suite at the moment. Hence, the private variable access. + # See: https://docs.python.org/3/library/configparser.html#mapping-protocol-access + if not has_explicit_profile and not ini_file.defaults(): + logger.debug(f'{config_path} has no DEFAULT profile configured') + return + if not has_explicit_profile: + profile = "DEFAULT" + profiles = ini_file._sections + if ini_file.defaults(): + profiles['DEFAULT'] = ini_file.defaults() + if profile not in profiles: + raise ValueError(f'resolve: {config_path} has no {profile} profile configured') + raw_config = profiles[profile] + logger.info(f'loading {profile} profile from {config_file}: {", ".join(raw_config.keys())}') + for k, v in raw_config.items(): + if k in self._inner: + # don't overwrite a value previously set + continue + self.__setattr__(k, v) + + def _validate(self): + auths_used = set() + for attr in Config.attributes(): + if attr.name not in self._inner: + continue + if not attr.auth: + continue + auths_used.add(attr.auth) + if len(auths_used) <= 1: + return + if self.auth_type: + # client has auth preference set + return + names = " and ".join(sorted(auths_used)) + raise ValueError(f'validate: more than one authorization method configured: {names}') + + def _init_auth(self): + try: + self._header_factory = self._credentials_provider(self) + self.auth_type = self._credentials_provider.auth_type() + if not self._header_factory: + raise ValueError('not configured') + except ValueError as e: + raise ValueError(f'{self._credentials_provider.auth_type()} auth: {e}') from e + + def __repr__(self): + return f'<{self.debug_string()}>' + + def copy(self): + """Creates a copy of the config object. + All the copies share most of their internal state (ie, shared reference to fields such as credential_provider). + Copies have their own instances of the following fields + - `_user_agent_other_info` + """ + cpy: Config = copy.copy(self) + cpy._user_agent_other_info = copy.deepcopy(self._user_agent_other_info) + return cpy diff --git a/databricks/sdk/core.py b/databricks/sdk/core.py index 3d7cbef14..2b7442708 100644 --- a/databricks/sdk/core.py +++ b/databricks/sdk/core.py @@ -1,1042 +1,22 @@ -import abc -import base64 -import configparser -import copy -import functools -import io -import json -import logging -import os -import pathlib -import platform import re -import subprocess -import sys import urllib.parse -from datetime import datetime, timedelta +from datetime import timedelta from json import JSONDecodeError from types import TracebackType -from typing import (Any, BinaryIO, Callable, Dict, Iterable, Iterator, List, - Optional, Type, Union) +from typing import Any, BinaryIO, Iterator, Type -import google.auth -import requests -from google.auth import impersonated_credentials -from google.auth.transport.requests import Request -from google.oauth2 import service_account from requests.adapters import HTTPAdapter -from .azure import (ARM_DATABRICKS_RESOURCE_ID, ENVIRONMENTS, AzureEnvironment, - add_sp_management_token, add_workspace_id_header) +from .config import * +# To preserve backwards compatibility (as these definitions were previously in this module) +from .credentials_provider import * from .errors import DatabricksError, error_mapper -from .oauth import (ClientCredentials, OAuthClient, OidcEndpoints, Refreshable, - Token, TokenCache, TokenSource) from .retries import retried -from .version import __version__ __all__ = ['Config', 'DatabricksError'] logger = logging.getLogger('databricks.sdk') -HeaderFactory = Callable[[], Dict[str, str]] - -GcpScopes = ["https://www.googleapis.com/auth/cloud-platform", "https://www.googleapis.com/auth/compute"] - - -class CredentialsProvider(abc.ABC): - """ CredentialsProvider is the protocol (call-side interface) - for authenticating requests to Databricks REST APIs""" - - @abc.abstractmethod - def auth_type(self) -> str: - ... - - @abc.abstractmethod - def __call__(self, cfg: 'Config') -> HeaderFactory: - ... - - -def credentials_provider(name: str, require: List[str]): - """ Given the function that receives a Config and returns RequestVisitor, - create CredentialsProvider with a given name and required configuration - attribute names to be present for this function to be called. """ - - def inner(func: Callable[['Config'], HeaderFactory]) -> CredentialsProvider: - - @functools.wraps(func) - def wrapper(cfg: 'Config') -> Optional[HeaderFactory]: - for attr in require: - if not getattr(cfg, attr): - return None - return func(cfg) - - wrapper.auth_type = lambda: name - return wrapper - - return inner - - -@credentials_provider('basic', ['host', 'username', 'password']) -def basic_auth(cfg: 'Config') -> HeaderFactory: - """ Given username and password, add base64-encoded Basic credentials """ - encoded = base64.b64encode(f'{cfg.username}:{cfg.password}'.encode()).decode() - static_credentials = {'Authorization': f'Basic {encoded}'} - - def inner() -> Dict[str, str]: - return static_credentials - - return inner - - -@credentials_provider('pat', ['host', 'token']) -def pat_auth(cfg: 'Config') -> HeaderFactory: - """ Adds Databricks Personal Access Token to every request """ - static_credentials = {'Authorization': f'Bearer {cfg.token}'} - - def inner() -> Dict[str, str]: - return static_credentials - - return inner - - -@credentials_provider('runtime', []) -def runtime_native_auth(cfg: 'Config') -> Optional[HeaderFactory]: - if 'DATABRICKS_RUNTIME_VERSION' not in os.environ: - return None - - # This import MUST be after the "DATABRICKS_RUNTIME_VERSION" check - # above, so that we are not throwing import errors when not in - # runtime and no config variables are set. - from databricks.sdk.runtime import (init_runtime_legacy_auth, - init_runtime_native_auth, - init_runtime_repl_auth) - for init in [init_runtime_native_auth, init_runtime_repl_auth, init_runtime_legacy_auth]: - if init is None: - continue - host, inner = init() - if host is None: - logger.debug(f'[{init.__name__}] no host detected') - continue - cfg.host = host - logger.debug(f'[{init.__name__}] runtime native auth configured') - return inner - return None - - -@credentials_provider('oauth-m2m', ['is_aws', 'host', 'client_id', 'client_secret']) -def oauth_service_principal(cfg: 'Config') -> Optional[HeaderFactory]: - """ Adds refreshed Databricks machine-to-machine OAuth Bearer token to every request, - if /oidc/.well-known/oauth-authorization-server is available on the given host. """ - # TODO: Azure returns 404 for UC workspace after redirecting to - # https://login.microsoftonline.com/{cfg.azure_tenant_id}/.well-known/oauth-authorization-server - oidc = cfg.oidc_endpoints - if oidc is None: - return None - token_source = ClientCredentials(client_id=cfg.client_id, - client_secret=cfg.client_secret, - token_url=oidc.token_endpoint, - scopes=["all-apis"], - use_header=True) - - def inner() -> Dict[str, str]: - token = token_source.token() - return {'Authorization': f'{token.token_type} {token.access_token}'} - - return inner - - -@credentials_provider('external-browser', ['host', 'auth_type']) -def external_browser(cfg: 'Config') -> Optional[HeaderFactory]: - if cfg.auth_type != 'external-browser': - return None - if cfg.client_id: - client_id = cfg.client_id - elif cfg.is_aws: - client_id = 'databricks-cli' - elif cfg.is_azure: - # Use Azure AD app for cases when Azure CLI is not available on the machine. - # App has to be registered as Single-page multi-tenant to support PKCE - # TODO: temporary app ID, change it later. - client_id = '6128a518-99a9-425b-8333-4cc94f04cacd' - else: - raise ValueError(f'local browser SSO is not supported') - oauth_client = OAuthClient(host=cfg.host, - client_id=client_id, - redirect_url='http://localhost:8020', - client_secret=cfg.client_secret) - - # Load cached credentials from disk if they exist. - # Note that these are local to the Python SDK and not reused by other SDKs. - token_cache = TokenCache(oauth_client) - credentials = token_cache.load() - if credentials: - # Force a refresh in case the loaded credentials are expired. - credentials.token() - else: - consent = oauth_client.initiate_consent() - if not consent: - return None - credentials = consent.launch_external_browser() - token_cache.save(credentials) - return credentials(cfg) - - -def _ensure_host_present(cfg: 'Config', token_source_for: Callable[[str], TokenSource]): - """ Resolves Azure Databricks workspace URL from ARM Resource ID """ - if cfg.host: - return - if not cfg.azure_workspace_resource_id: - return - arm = cfg.arm_environment.resource_manager_endpoint - token = token_source_for(arm).token() - resp = requests.get(f"{arm}{cfg.azure_workspace_resource_id}?api-version=2018-04-01", - headers={"Authorization": f"Bearer {token.access_token}"}) - if not resp.ok: - raise ValueError(f"Cannot resolve Azure Databricks workspace: {resp.content}") - cfg.host = f"https://{resp.json()['properties']['workspaceUrl']}" - - -@credentials_provider('azure-client-secret', - ['is_azure', 'azure_client_id', 'azure_client_secret', 'azure_tenant_id']) -def azure_service_principal(cfg: 'Config') -> HeaderFactory: - """ Adds refreshed Azure Active Directory (AAD) Service Principal OAuth tokens - to every request, while automatically resolving different Azure environment endpoints. """ - - def token_source_for(resource: str) -> TokenSource: - aad_endpoint = cfg.arm_environment.active_directory_endpoint - return ClientCredentials(client_id=cfg.azure_client_id, - client_secret=cfg.azure_client_secret, - token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token", - endpoint_params={"resource": resource}, - use_params=True) - - _ensure_host_present(cfg, token_source_for) - logger.info("Configured AAD token for Service Principal (%s)", cfg.azure_client_id) - inner = token_source_for(cfg.effective_azure_login_app_id) - cloud = token_source_for(cfg.arm_environment.service_management_endpoint) - - def refreshed_headers() -> Dict[str, str]: - headers = {'Authorization': f"Bearer {inner.token().access_token}", } - add_workspace_id_header(cfg, headers) - add_sp_management_token(cloud, headers) - return headers - - return refreshed_headers - - -@credentials_provider('github-oidc-azure', ['host', 'azure_client_id']) -def github_oidc_azure(cfg: 'Config') -> Optional[HeaderFactory]: - if 'ACTIONS_ID_TOKEN_REQUEST_TOKEN' not in os.environ: - # not in GitHub actions - return None - - # Client ID is the minimal thing we need, as otherwise we get AADSTS700016: Application with - # identifier 'https://token.actions.githubusercontent.com' was not found in the directory '...'. - if not cfg.is_azure: - return None - - # See https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/configuring-openid-connect-in-cloud-providers - headers = {'Authorization': f"Bearer {os.environ['ACTIONS_ID_TOKEN_REQUEST_TOKEN']}"} - endpoint = f"{os.environ['ACTIONS_ID_TOKEN_REQUEST_URL']}&audience=api://AzureADTokenExchange" - response = requests.get(endpoint, headers=headers) - if not response.ok: - return None - - # get the ID Token with aud=api://AzureADTokenExchange sub=repo:org/repo:environment:name - response_json = response.json() - if 'value' not in response_json: - return None - - logger.info("Configured AAD token for GitHub Actions OIDC (%s)", cfg.azure_client_id) - params = { - 'client_assertion_type': 'urn:ietf:params:oauth:client-assertion-type:jwt-bearer', - 'resource': cfg.effective_azure_login_app_id, - 'client_assertion': response_json['value'], - } - aad_endpoint = cfg.arm_environment.active_directory_endpoint - if not cfg.azure_tenant_id: - # detect Azure AD Tenant ID if it's not specified directly - token_endpoint = cfg.oidc_endpoints.token_endpoint - cfg.azure_tenant_id = token_endpoint.replace(aad_endpoint, '').split('/')[0] - inner = ClientCredentials(client_id=cfg.azure_client_id, - client_secret="", # we have no (rotatable) secrets in OIDC flow - token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token", - endpoint_params=params, - use_params=True) - - def refreshed_headers() -> Dict[str, str]: - token = inner.token() - return {'Authorization': f'{token.token_type} {token.access_token}'} - - return refreshed_headers - - -@credentials_provider('google-credentials', ['host', 'google_credentials']) -def google_credentials(cfg: 'Config') -> Optional[HeaderFactory]: - if not cfg.is_gcp: - return None - # Reads credentials as JSON. Credentials can be either a path to JSON file, or actual JSON string. - # Obtain the id token by providing the json file path and target audience. - if (os.path.isfile(cfg.google_credentials)): - with io.open(cfg.google_credentials, "r", encoding="utf-8") as json_file: - account_info = json.load(json_file) - else: - # If the file doesn't exist, assume that the config is the actual JSON content. - account_info = json.loads(cfg.google_credentials) - - credentials = service_account.IDTokenCredentials.from_service_account_info(info=account_info, - target_audience=cfg.host) - - request = Request() - - gcp_credentials = service_account.Credentials.from_service_account_info(info=account_info, - scopes=GcpScopes) - - def refreshed_headers() -> Dict[str, str]: - credentials.refresh(request) - headers = {'Authorization': f'Bearer {credentials.token}'} - if cfg.is_account_client: - gcp_credentials.refresh(request) - headers["X-Databricks-GCP-SA-Access-Token"] = gcp_credentials.token - return headers - - return refreshed_headers - - -@credentials_provider('google-id', ['host', 'google_service_account']) -def google_id(cfg: 'Config') -> Optional[HeaderFactory]: - if not cfg.is_gcp: - return None - credentials, _project_id = google.auth.default() - - # Create the impersonated credential. - target_credentials = impersonated_credentials.Credentials(source_credentials=credentials, - target_principal=cfg.google_service_account, - target_scopes=[]) - - # Set the impersonated credential, target audience and token options. - id_creds = impersonated_credentials.IDTokenCredentials(target_credentials, - target_audience=cfg.host, - include_email=True) - - gcp_impersonated_credentials = impersonated_credentials.Credentials( - source_credentials=credentials, target_principal=cfg.google_service_account, target_scopes=GcpScopes) - - request = Request() - - def refreshed_headers() -> Dict[str, str]: - id_creds.refresh(request) - headers = {'Authorization': f'Bearer {id_creds.token}'} - if cfg.is_account_client: - gcp_impersonated_credentials.refresh(request) - headers["X-Databricks-GCP-SA-Access-Token"] = gcp_impersonated_credentials.token - return headers - - return refreshed_headers - - -class CliTokenSource(Refreshable): - - def __init__(self, cmd: List[str], token_type_field: str, access_token_field: str, expiry_field: str): - super().__init__() - self._cmd = cmd - self._token_type_field = token_type_field - self._access_token_field = access_token_field - self._expiry_field = expiry_field - - @staticmethod - def _parse_expiry(expiry: str) -> datetime: - expiry = expiry.rstrip("Z").split(".")[0] - for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"): - try: - return datetime.strptime(expiry, fmt) - except ValueError as e: - last_e = e - if last_e: - raise last_e - - def refresh(self) -> Token: - try: - is_windows = sys.platform.startswith('win') - # windows requires shell=True to be able to execute 'az login' or other commands - # cannot use shell=True all the time, as it breaks macOS - out = subprocess.run(self._cmd, capture_output=True, check=True, shell=is_windows) - it = json.loads(out.stdout.decode()) - expires_on = self._parse_expiry(it[self._expiry_field]) - return Token(access_token=it[self._access_token_field], - token_type=it[self._token_type_field], - expiry=expires_on) - except ValueError as e: - raise ValueError(f"cannot unmarshal CLI result: {e}") - except subprocess.CalledProcessError as e: - stdout = e.stdout.decode().strip() - stderr = e.stderr.decode().strip() - message = stdout or stderr - raise IOError(f'cannot get access token: {message}') from e - - -class AzureCliTokenSource(CliTokenSource): - """ Obtain the token granted by `az login` CLI command """ - - def __init__(self, resource: str, subscription: str = ""): - cmd = ["az", "account", "get-access-token", "--resource", resource, "--output", "json"] - if subscription != "": - cmd.append("--subscription") - cmd.append(subscription) - super().__init__(cmd=cmd, - token_type_field='tokenType', - access_token_field='accessToken', - expiry_field='expiresOn') - - def is_human_user(self) -> bool: - """The UPN claim is the username of the user, but not the Service Principal. - - Azure CLI can be authenticated by both human users (`az login`) and service principals. In case of service - principals, it can be either OIDC from GitHub or login with a password: - - ~ $ az login --service-principal --user $clientID --password $clientSecret --tenant $tenantID - - Human users get more claims: - - 'amr' - how the subject of the token was authenticated - - 'name', 'family_name', 'given_name' - human-readable values that identifies the subject of the token - - 'scp' with `user_impersonation` value, that shows the set of scopes exposed by your application for which - the client application has requested (and received) consent - - 'unique_name' - a human-readable value that identifies the subject of the token. This value is not - guaranteed to be unique within a tenant and should be used only for display purposes. - - 'upn' - The username of the user. - """ - return 'upn' in self.token().jwt_claims() - - @staticmethod - def for_resource(cfg: 'Config', resource: str) -> 'AzureCliTokenSource': - subscription = AzureCliTokenSource.get_subscription(cfg) - if subscription != "": - token_source = AzureCliTokenSource(resource, subscription) - try: - # This will fail if the user has access to the workspace, but not to the subscription - # itself. - # In such case, we fall back to not using the subscription. - token_source.token() - return token_source - except OSError: - logger.warning("Failed to get token for subscription. Using resource only token.") - - token_source = AzureCliTokenSource(resource) - token_source.token() - return token_source - - @staticmethod - def get_subscription(cfg: 'Config') -> str: - resource = cfg.azure_workspace_resource_id - if resource is None or resource == "": - return "" - components = resource.split('/') - if len(components) < 3: - logger.warning("Invalid azure workspace resource ID") - return "" - return components[2] - - -@credentials_provider('azure-cli', ['is_azure']) -def azure_cli(cfg: 'Config') -> Optional[HeaderFactory]: - """ Adds refreshed OAuth token granted by `az login` command to every request. """ - token_source = None - mgmt_token_source = None - try: - token_source = AzureCliTokenSource.for_resource(cfg, cfg.effective_azure_login_app_id) - except FileNotFoundError: - doc = 'https://docs.microsoft.com/en-us/cli/azure/?view=azure-cli-latest' - logger.debug(f'Most likely Azure CLI is not installed. See {doc} for details') - return None - if not token_source.is_human_user(): - try: - management_endpoint = cfg.arm_environment.service_management_endpoint - mgmt_token_source = AzureCliTokenSource.for_resource(cfg, management_endpoint) - except Exception as e: - logger.debug(f'Not including service management token in headers', exc_info=e) - mgmt_token_source = None - - _ensure_host_present(cfg, lambda resource: AzureCliTokenSource.for_resource(cfg, resource)) - logger.info("Using Azure CLI authentication with AAD tokens") - if not cfg.is_account_client and AzureCliTokenSource.get_subscription(cfg) == "": - logger.warning( - "azure_workspace_resource_id field not provided. " - "It is recommended to specify this field in the Databricks configuration to avoid authentication errors." - ) - - def inner() -> Dict[str, str]: - token = token_source.token() - headers = {'Authorization': f'{token.token_type} {token.access_token}'} - add_workspace_id_header(cfg, headers) - if mgmt_token_source: - add_sp_management_token(mgmt_token_source, headers) - return headers - - return inner - - -class DatabricksCliTokenSource(CliTokenSource): - """ Obtain the token granted by `databricks auth login` CLI command """ - - def __init__(self, cfg: 'Config'): - args = ['auth', 'token', '--host', cfg.host] - if cfg.is_account_client: - args += ['--account-id', cfg.account_id] - - cli_path = cfg.databricks_cli_path - if not cli_path: - cli_path = 'databricks' - - # If the path is unqualified, look it up in PATH. - if cli_path.count("/") == 0: - cli_path = self.__class__._find_executable(cli_path) - - super().__init__(cmd=[cli_path, *args], - token_type_field='token_type', - access_token_field='access_token', - expiry_field='expiry') - - @staticmethod - def _find_executable(name) -> str: - err = FileNotFoundError("Most likely the Databricks CLI is not installed") - for dir in os.getenv("PATH", default="").split(os.path.pathsep): - path = pathlib.Path(dir).joinpath(name).resolve() - if not path.is_file(): - continue - - # The new Databricks CLI is a single binary with size > 1MB. - # We use the size as a signal to determine which Databricks CLI is installed. - stat = path.stat() - if stat.st_size < (1024 * 1024): - err = FileNotFoundError("Databricks CLI version <0.100.0 detected") - continue - - return str(path) - - raise err - - -@credentials_provider('databricks-cli', ['host', 'is_aws']) -def databricks_cli(cfg: 'Config') -> Optional[HeaderFactory]: - try: - token_source = DatabricksCliTokenSource(cfg) - except FileNotFoundError as e: - logger.debug(e) - return None - - try: - token_source.token() - except IOError as e: - if 'databricks OAuth is not' in str(e): - logger.debug(f'OAuth not configured or not available: {e}') - return None - raise e - - logger.info("Using Databricks CLI authentication") - - def inner() -> Dict[str, str]: - token = token_source.token() - return {'Authorization': f'{token.token_type} {token.access_token}'} - - return inner - - -class MetadataServiceTokenSource(Refreshable): - """ Obtain the token granted by Databricks Metadata Service """ - METADATA_SERVICE_VERSION = "1" - METADATA_SERVICE_VERSION_HEADER = "X-Databricks-Metadata-Version" - METADATA_SERVICE_HOST_HEADER = "X-Databricks-Host" - _metadata_service_timeout = 10 # seconds - - def __init__(self, cfg: 'Config'): - super().__init__() - self.url = cfg.metadata_service_url - self.host = cfg.host - - def refresh(self) -> Token: - resp = requests.get(self.url, - timeout=self._metadata_service_timeout, - headers={ - self.METADATA_SERVICE_VERSION_HEADER: self.METADATA_SERVICE_VERSION, - self.METADATA_SERVICE_HOST_HEADER: self.host - }) - json_resp: dict[str, Union[str, float]] = resp.json() - access_token = json_resp.get("access_token", None) - if access_token is None: - raise ValueError("Metadata Service returned empty token") - token_type = json_resp.get("token_type", None) - if token_type is None: - raise ValueError("Metadata Service returned empty token type") - if json_resp["expires_on"] in ["", None]: - raise ValueError("Metadata Service returned invalid expiry") - try: - expiry = datetime.fromtimestamp(json_resp["expires_on"]) - except: - raise ValueError("Metadata Service returned invalid expiry") - - return Token(access_token=access_token, token_type=token_type, expiry=expiry) - - -@credentials_provider('metadata-service', ['host', 'metadata_service_url']) -def metadata_service(cfg: 'Config') -> Optional[HeaderFactory]: - """ Adds refreshed token granted by Databricks Metadata Service to every request. """ - - token_source = MetadataServiceTokenSource(cfg) - token_source.token() - logger.info("Using Databricks Metadata Service authentication") - - def inner() -> Dict[str, str]: - token = token_source.token() - return {'Authorization': f'{token.token_type} {token.access_token}'} - - return inner - - -class DefaultCredentials: - """ Select the first applicable credential provider from the chain """ - - def __init__(self) -> None: - self._auth_type = 'default' - - def auth_type(self) -> str: - return self._auth_type - - def __call__(self, cfg: 'Config') -> HeaderFactory: - auth_providers = [ - pat_auth, basic_auth, metadata_service, oauth_service_principal, azure_service_principal, - github_oidc_azure, azure_cli, external_browser, databricks_cli, runtime_native_auth, - google_credentials, google_id - ] - for provider in auth_providers: - auth_type = provider.auth_type() - if cfg.auth_type and auth_type != cfg.auth_type: - # ignore other auth types if one is explicitly enforced - logger.debug(f"Ignoring {auth_type} auth, because {cfg.auth_type} is preferred") - continue - logger.debug(f'Attempting to configure auth: {auth_type}') - try: - header_factory = provider(cfg) - if not header_factory: - continue - self._auth_type = auth_type - return header_factory - except Exception as e: - raise ValueError(f'{auth_type}: {e}') from e - auth_flow_url = "https://docs.databricks.com/en/dev-tools/auth.html#databricks-client-unified-authentication" - raise ValueError( - f'cannot configure default credentials, please check {auth_flow_url} to configure credentials for your preferred authentication method.' - ) - - -class ConfigAttribute: - """ Configuration attribute metadata and descriptor protocols. """ - - # name and transform are discovered from Config.__new__ - name: str = None - transform: type = str - - def __init__(self, env: str = None, auth: str = None, sensitive: bool = False): - self.env = env - self.auth = auth - self.sensitive = sensitive - - def __get__(self, cfg: 'Config', owner): - if not cfg: - return None - return cfg._inner.get(self.name, None) - - def __set__(self, cfg: 'Config', value: any): - cfg._inner[self.name] = self.transform(value) - - def __repr__(self) -> str: - return f"" - - -class Config: - host: str = ConfigAttribute(env='DATABRICKS_HOST') - account_id: str = ConfigAttribute(env='DATABRICKS_ACCOUNT_ID') - token: str = ConfigAttribute(env='DATABRICKS_TOKEN', auth='pat', sensitive=True) - username: str = ConfigAttribute(env='DATABRICKS_USERNAME', auth='basic') - password: str = ConfigAttribute(env='DATABRICKS_PASSWORD', auth='basic', sensitive=True) - client_id: str = ConfigAttribute(env='DATABRICKS_CLIENT_ID', auth='oauth') - client_secret: str = ConfigAttribute(env='DATABRICKS_CLIENT_SECRET', auth='oauth', sensitive=True) - profile: str = ConfigAttribute(env='DATABRICKS_CONFIG_PROFILE') - config_file: str = ConfigAttribute(env='DATABRICKS_CONFIG_FILE') - google_service_account: str = ConfigAttribute(env='DATABRICKS_GOOGLE_SERVICE_ACCOUNT', auth='google') - google_credentials: str = ConfigAttribute(env='GOOGLE_CREDENTIALS', auth='google', sensitive=True) - azure_workspace_resource_id: str = ConfigAttribute(env='DATABRICKS_AZURE_RESOURCE_ID', auth='azure') - azure_use_msi: bool = ConfigAttribute(env='ARM_USE_MSI', auth='azure') - azure_client_secret: str = ConfigAttribute(env='ARM_CLIENT_SECRET', auth='azure', sensitive=True) - azure_client_id: str = ConfigAttribute(env='ARM_CLIENT_ID', auth='azure') - azure_tenant_id: str = ConfigAttribute(env='ARM_TENANT_ID', auth='azure') - azure_environment: str = ConfigAttribute(env='ARM_ENVIRONMENT') - azure_login_app_id: str = ConfigAttribute(env='DATABRICKS_AZURE_LOGIN_APP_ID', auth='azure') - databricks_cli_path: str = ConfigAttribute(env='DATABRICKS_CLI_PATH') - auth_type: str = ConfigAttribute(env='DATABRICKS_AUTH_TYPE') - cluster_id: str = ConfigAttribute(env='DATABRICKS_CLUSTER_ID') - warehouse_id: str = ConfigAttribute(env='DATABRICKS_WAREHOUSE_ID') - skip_verify: bool = ConfigAttribute() - http_timeout_seconds: float = ConfigAttribute() - debug_truncate_bytes: int = ConfigAttribute(env='DATABRICKS_DEBUG_TRUNCATE_BYTES') - debug_headers: bool = ConfigAttribute(env='DATABRICKS_DEBUG_HEADERS') - rate_limit: int = ConfigAttribute(env='DATABRICKS_RATE_LIMIT') - retry_timeout_seconds: int = ConfigAttribute() - metadata_service_url = ConfigAttribute(env='DATABRICKS_METADATA_SERVICE_URL', - auth='metadata-service', - sensitive=True) - max_connection_pools: int = ConfigAttribute() - max_connections_per_pool: int = ConfigAttribute() - - def __init__(self, - *, - credentials_provider: CredentialsProvider = None, - product="unknown", - product_version="0.0.0", - **kwargs): - self._inner = {} - self._user_agent_other_info = [] - self._credentials_provider = credentials_provider if credentials_provider else DefaultCredentials() - try: - self._set_inner_config(kwargs) - self._load_from_env() - self._known_file_config_loader() - self._fix_host_if_needed() - self._validate() - self._init_auth() - self._product = product - self._product_version = product_version - except ValueError as e: - message = self.wrap_debug_info(str(e)) - raise ValueError(message) from e - - def wrap_debug_info(self, message: str) -> str: - debug_string = self.debug_string() - if debug_string: - message = f'{message.rstrip(".")}. {debug_string}' - return message - - @staticmethod - def parse_dsn(dsn: str) -> 'Config': - uri = urllib.parse.urlparse(dsn) - if uri.scheme != 'databricks': - raise ValueError(f'Expected databricks:// scheme, got {uri.scheme}://') - kwargs = {'host': f'https://{uri.hostname}'} - if uri.username: - kwargs['username'] = uri.username - if uri.password: - kwargs['password'] = uri.password - query = dict(urllib.parse.parse_qsl(uri.query)) - for attr in Config.attributes(): - if attr.name not in query: - continue - kwargs[attr.name] = query[attr.name] - return Config(**kwargs) - - def authenticate(self) -> Dict[str, str]: - """ Returns a list of fresh authentication headers """ - return self._header_factory() - - def as_dict(self) -> dict: - return self._inner - - @property - def is_azure(self) -> bool: - has_resource_id = self.azure_workspace_resource_id is not None - has_host = self.host is not None - is_public_cloud = has_host and ".azuredatabricks.net" in self.host - is_china_cloud = has_host and ".databricks.azure.cn" in self.host - is_gov_cloud = has_host and ".databricks.azure.us" in self.host - is_valid_cloud = is_public_cloud or is_china_cloud or is_gov_cloud - return has_resource_id or (has_host and is_valid_cloud) - - @property - def is_gcp(self) -> bool: - return self.host and ".gcp.databricks.com" in self.host - - @property - def is_aws(self) -> bool: - return not self.is_azure and not self.is_gcp - - @property - def is_account_client(self) -> bool: - if not self.host: - return False - return self.host.startswith("https://accounts.") or self.host.startswith("https://accounts-dod.") - - @property - def arm_environment(self) -> AzureEnvironment: - env = self.azure_environment if self.azure_environment else "PUBLIC" - try: - return ENVIRONMENTS[env] - except KeyError: - raise ValueError(f"Cannot find Azure {env} Environment") - - @property - def effective_azure_login_app_id(self): - app_id = self.azure_login_app_id - if app_id: - return app_id - return ARM_DATABRICKS_RESOURCE_ID - - @property - def hostname(self) -> str: - url = urllib.parse.urlparse(self.host) - return url.netloc - - @property - def is_any_auth_configured(self) -> bool: - for attr in Config.attributes(): - if not attr.auth: - continue - value = self._inner.get(attr.name, None) - if value: - return True - return False - - @property - def user_agent(self): - """ Returns User-Agent header used by this SDK """ - py_version = platform.python_version() - os_name = platform.uname().system.lower() - - ua = [ - f"{self._product}/{self._product_version}", f"databricks-sdk-py/{__version__}", - f"python/{py_version}", f"os/{os_name}", f"auth/{self.auth_type}", - ] - if len(self._user_agent_other_info) > 0: - ua.append(' '.join(self._user_agent_other_info)) - if len(self._upstream_user_agent) > 0: - ua.append(self._upstream_user_agent) - if 'DATABRICKS_RUNTIME_VERSION' in os.environ: - runtime_version = os.environ['DATABRICKS_RUNTIME_VERSION'] - if runtime_version != '': - runtime_version = self._sanitize_header_value(runtime_version) - ua.append(f'runtime/{runtime_version}') - - return ' '.join(ua) - - @staticmethod - def _sanitize_header_value(value: str) -> str: - value = value.replace(' ', '-') - value = value.replace('/', '-') - return value - - @property - def _upstream_user_agent(self) -> str: - product = os.environ.get('DATABRICKS_SDK_UPSTREAM', None) - product_version = os.environ.get('DATABRICKS_SDK_UPSTREAM_VERSION', None) - if product is not None and product_version is not None: - return f"upstream/{product} upstream-version/{product_version}" - return "" - - def with_user_agent_extra(self, key: str, value: str) -> 'Config': - self._user_agent_other_info.append(f"{key}/{value}") - return self - - @property - def oidc_endpoints(self) -> Optional[OidcEndpoints]: - self._fix_host_if_needed() - if not self.host: - return None - if self.is_azure: - # Retrieve authorize endpoint to retrieve token endpoint after - res = requests.get(f'{self.host}/oidc/oauth2/v2.0/authorize', allow_redirects=False) - real_auth_url = res.headers.get('location') - if not real_auth_url: - return None - return OidcEndpoints(authorization_endpoint=real_auth_url, - token_endpoint=real_auth_url.replace('/authorize', '/token')) - if self.is_account_client and self.account_id: - prefix = f'{self.host}/oidc/accounts/{self.account_id}' - return OidcEndpoints(authorization_endpoint=f'{prefix}/v1/authorize', - token_endpoint=f'{prefix}/v1/token') - oidc = f'{self.host}/oidc/.well-known/oauth-authorization-server' - res = requests.get(oidc) - if res.status_code != 200: - return None - auth_metadata = res.json() - return OidcEndpoints(authorization_endpoint=auth_metadata.get('authorization_endpoint'), - token_endpoint=auth_metadata.get('token_endpoint')) - - def debug_string(self) -> str: - """ Returns log-friendly representation of configured attributes """ - buf = [] - attrs_used = [] - envs_used = [] - for attr in Config.attributes(): - if attr.env and os.environ.get(attr.env): - envs_used.append(attr.env) - value = getattr(self, attr.name) - if not value: - continue - safe = '***' if attr.sensitive else f'{value}' - attrs_used.append(f'{attr.name}={safe}') - if attrs_used: - buf.append(f'Config: {", ".join(attrs_used)}') - if envs_used: - buf.append(f'Env: {", ".join(envs_used)}') - return '. '.join(buf) - - def to_dict(self) -> Dict[str, any]: - return self._inner - - @property - def sql_http_path(self) -> Optional[str]: - """(Experimental) Return HTTP path for SQL Drivers. - - If `cluster_id` or `warehouse_id` are configured, return a valid HTTP Path argument - used in construction of JDBC/ODBC DSN string. - - See https://docs.databricks.com/integrations/jdbc-odbc-bi.html - """ - if (not self.cluster_id) and (not self.warehouse_id): - return None - if self.cluster_id and self.warehouse_id: - raise ValueError('cannot have both cluster_id and warehouse_id') - headers = self.authenticate() - headers['User-Agent'] = f'{self.user_agent} sdk-feature/sql-http-path' - if self.cluster_id: - response = requests.get(f"{self.host}/api/2.0/preview/scim/v2/Me", headers=headers) - # get workspace ID from the response header - workspace_id = response.headers.get('x-databricks-org-id') - return f'sql/protocolv1/o/{workspace_id}/{self.cluster_id}' - if self.warehouse_id: - return f'/sql/1.0/warehouses/{self.warehouse_id}' - - @classmethod - def attributes(cls) -> Iterable[ConfigAttribute]: - """ Returns a list of Databricks SDK configuration metadata """ - if hasattr(cls, '_attributes'): - return cls._attributes - if sys.version_info[1] >= 10: - import inspect - anno = inspect.get_annotations(cls) - else: - # Python 3.7 compatibility: getting type hints require extra hop, as described in - # "Accessing The Annotations Dict Of An Object In Python 3.9 And Older" section of - # https://docs.python.org/3/howto/annotations.html - anno = cls.__dict__['__annotations__'] - attrs = [] - for name, v in cls.__dict__.items(): - if type(v) != ConfigAttribute: - continue - v.name = name - v.transform = anno.get(name, str) - attrs.append(v) - cls._attributes = attrs - return cls._attributes - - def _fix_host_if_needed(self): - if not self.host: - return - # fix url to remove trailing slash - o = urllib.parse.urlparse(self.host) - if not o.hostname: - # only hostname is specified - self.host = f"https://{self.host}" - else: - self.host = f"{o.scheme}://{o.netloc}" - - def _set_inner_config(self, keyword_args: Dict[str, any]): - for attr in self.attributes(): - if attr.name not in keyword_args: - continue - if keyword_args.get(attr.name, None) is None: - continue - self.__setattr__(attr.name, keyword_args[attr.name]) - - def _load_from_env(self): - found = False - for attr in self.attributes(): - if not attr.env: - continue - if attr.name in self._inner: - continue - value = os.environ.get(attr.env) - if not value: - continue - self.__setattr__(attr.name, value) - found = True - if found: - logger.debug('Loaded from environment') - - def _known_file_config_loader(self): - if not self.profile and (self.is_any_auth_configured or self.host - or self.azure_workspace_resource_id): - # skip loading configuration file if there's any auth configured - # directly as part of the Config() constructor. - return - config_file = self.config_file - if not config_file: - config_file = "~/.databrickscfg" - config_path = pathlib.Path(config_file).expanduser() - if not config_path.exists(): - logger.debug("%s does not exist", config_path) - return - ini_file = configparser.ConfigParser() - ini_file.read(config_path) - profile = self.profile - has_explicit_profile = self.profile is not None - # In Go SDK, we skip merging the profile with DEFAULT section, though Python's ConfigParser.items() - # is returning profile key-value pairs _including those from DEFAULT_. This is not what we expect - # from Unified Auth test suite at the moment. Hence, the private variable access. - # See: https://docs.python.org/3/library/configparser.html#mapping-protocol-access - if not has_explicit_profile and not ini_file.defaults(): - logger.debug(f'{config_path} has no DEFAULT profile configured') - return - if not has_explicit_profile: - profile = "DEFAULT" - profiles = ini_file._sections - if ini_file.defaults(): - profiles['DEFAULT'] = ini_file.defaults() - if profile not in profiles: - raise ValueError(f'resolve: {config_path} has no {profile} profile configured') - raw_config = profiles[profile] - logger.info(f'loading {profile} profile from {config_file}: {", ".join(raw_config.keys())}') - for k, v in raw_config.items(): - if k in self._inner: - # don't overwrite a value previously set - continue - self.__setattr__(k, v) - - def _validate(self): - auths_used = set() - for attr in Config.attributes(): - if attr.name not in self._inner: - continue - if not attr.auth: - continue - auths_used.add(attr.auth) - if len(auths_used) <= 1: - return - if self.auth_type: - # client has auth preference set - return - names = " and ".join(sorted(auths_used)) - raise ValueError(f'validate: more than one authorization method configured: {names}') - - def _init_auth(self): - try: - self._header_factory = self._credentials_provider(self) - self.auth_type = self._credentials_provider.auth_type() - if not self._header_factory: - raise ValueError('not configured') - except ValueError as e: - raise ValueError(f'{self._credentials_provider.auth_type()} auth: {e}') from e - - def __repr__(self): - return f'<{self.debug_string()}>' - - def copy(self): - """Creates a copy of the config object. - All the copies share most of their internal state (ie, shared reference to fields such as credential_provider). - Copies have their own instances of the following fields - - `_user_agent_other_info` - """ - cpy: Config = copy.copy(self) - cpy._user_agent_other_info = copy.deepcopy(self._user_agent_other_info) - return cpy - class ApiClient: _cfg: Config diff --git a/databricks/sdk/credentials_provider.py b/databricks/sdk/credentials_provider.py new file mode 100644 index 000000000..2c30ea143 --- /dev/null +++ b/databricks/sdk/credentials_provider.py @@ -0,0 +1,617 @@ +import abc +import base64 +import functools +import io +import json +import logging +import os +import pathlib +import subprocess +import sys +from datetime import datetime +from typing import Callable, Dict, List, Optional, Union + +import google.auth +import requests +from google.auth import impersonated_credentials +from google.auth.transport.requests import Request +from google.oauth2 import service_account + +from .azure import add_sp_management_token, add_workspace_id_header +from .oauth import (ClientCredentials, OAuthClient, Refreshable, Token, + TokenCache, TokenSource) + +HeaderFactory = Callable[[], Dict[str, str]] + +logger = logging.getLogger('databricks.sdk') + + +class CredentialsProvider(abc.ABC): + """ CredentialsProvider is the protocol (call-side interface) + for authenticating requests to Databricks REST APIs""" + + @abc.abstractmethod + def auth_type(self) -> str: + ... + + @abc.abstractmethod + def __call__(self, cfg: 'Config') -> HeaderFactory: + ... + + +def credentials_provider(name: str, require: List[str]): + """ Given the function that receives a Config and returns RequestVisitor, + create CredentialsProvider with a given name and required configuration + attribute names to be present for this function to be called. """ + + def inner(func: Callable[['Config'], HeaderFactory]) -> CredentialsProvider: + + @functools.wraps(func) + def wrapper(cfg: 'Config') -> Optional[HeaderFactory]: + for attr in require: + if not getattr(cfg, attr): + return None + return func(cfg) + + wrapper.auth_type = lambda: name + return wrapper + + return inner + + +@credentials_provider('basic', ['host', 'username', 'password']) +def basic_auth(cfg: 'Config') -> HeaderFactory: + """ Given username and password, add base64-encoded Basic credentials """ + encoded = base64.b64encode(f'{cfg.username}:{cfg.password}'.encode()).decode() + static_credentials = {'Authorization': f'Basic {encoded}'} + + def inner() -> Dict[str, str]: + return static_credentials + + return inner + + +@credentials_provider('pat', ['host', 'token']) +def pat_auth(cfg: 'Config') -> HeaderFactory: + """ Adds Databricks Personal Access Token to every request """ + static_credentials = {'Authorization': f'Bearer {cfg.token}'} + + def inner() -> Dict[str, str]: + return static_credentials + + return inner + + +@credentials_provider('runtime', []) +def runtime_native_auth(cfg: 'Config') -> Optional[HeaderFactory]: + if 'DATABRICKS_RUNTIME_VERSION' not in os.environ: + return None + + # This import MUST be after the "DATABRICKS_RUNTIME_VERSION" check + # above, so that we are not throwing import errors when not in + # runtime and no config variables are set. + from databricks.sdk.runtime import (init_runtime_legacy_auth, + init_runtime_native_auth, + init_runtime_repl_auth) + for init in [init_runtime_native_auth, init_runtime_repl_auth, init_runtime_legacy_auth]: + if init is None: + continue + host, inner = init() + if host is None: + logger.debug(f'[{init.__name__}] no host detected') + continue + cfg.host = host + logger.debug(f'[{init.__name__}] runtime native auth configured') + return inner + return None + + +@credentials_provider('oauth-m2m', ['host', 'client_id', 'client_secret']) +def oauth_service_principal(cfg: 'Config') -> Optional[HeaderFactory]: + """ Adds refreshed Databricks machine-to-machine OAuth Bearer token to every request, + if /oidc/.well-known/oauth-authorization-server is available on the given host. """ + oidc = cfg.oidc_endpoints + if oidc is None: + return None + token_source = ClientCredentials(client_id=cfg.client_id, + client_secret=cfg.client_secret, + token_url=oidc.token_endpoint, + scopes=["all-apis"], + use_header=True) + + def inner() -> Dict[str, str]: + token = token_source.token() + return {'Authorization': f'{token.token_type} {token.access_token}'} + + return inner + + +@credentials_provider('external-browser', ['host', 'auth_type']) +def external_browser(cfg: 'Config') -> Optional[HeaderFactory]: + if cfg.auth_type != 'external-browser': + return None + if cfg.client_id: + client_id = cfg.client_id + elif cfg.is_aws: + client_id = 'databricks-cli' + elif cfg.is_azure: + # Use Azure AD app for cases when Azure CLI is not available on the machine. + # App has to be registered as Single-page multi-tenant to support PKCE + # TODO: temporary app ID, change it later. + client_id = '6128a518-99a9-425b-8333-4cc94f04cacd' + else: + raise ValueError(f'local browser SSO is not supported') + oauth_client = OAuthClient(host=cfg.host, + client_id=client_id, + redirect_url='http://localhost:8020', + client_secret=cfg.client_secret) + + # Load cached credentials from disk if they exist. + # Note that these are local to the Python SDK and not reused by other SDKs. + token_cache = TokenCache(oauth_client) + credentials = token_cache.load() + if credentials: + # Force a refresh in case the loaded credentials are expired. + credentials.token() + else: + consent = oauth_client.initiate_consent() + if not consent: + return None + credentials = consent.launch_external_browser() + token_cache.save(credentials) + return credentials(cfg) + + +def _ensure_host_present(cfg: 'Config', token_source_for: Callable[[str], TokenSource]): + """ Resolves Azure Databricks workspace URL from ARM Resource ID """ + if cfg.host: + return + if not cfg.azure_workspace_resource_id: + return + arm = cfg.arm_environment.resource_manager_endpoint + token = token_source_for(arm).token() + resp = requests.get(f"{arm}{cfg.azure_workspace_resource_id}?api-version=2018-04-01", + headers={"Authorization": f"Bearer {token.access_token}"}) + if not resp.ok: + raise ValueError(f"Cannot resolve Azure Databricks workspace: {resp.content}") + cfg.host = f"https://{resp.json()['properties']['workspaceUrl']}" + + +@credentials_provider('azure-client-secret', + ['is_azure', 'azure_client_id', 'azure_client_secret', 'azure_tenant_id']) +def azure_service_principal(cfg: 'Config') -> HeaderFactory: + """ Adds refreshed Azure Active Directory (AAD) Service Principal OAuth tokens + to every request, while automatically resolving different Azure environment endpoints. """ + + def token_source_for(resource: str) -> TokenSource: + aad_endpoint = cfg.arm_environment.active_directory_endpoint + return ClientCredentials(client_id=cfg.azure_client_id, + client_secret=cfg.azure_client_secret, + token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token", + endpoint_params={"resource": resource}, + use_params=True) + + _ensure_host_present(cfg, token_source_for) + logger.info("Configured AAD token for Service Principal (%s)", cfg.azure_client_id) + inner = token_source_for(cfg.effective_azure_login_app_id) + cloud = token_source_for(cfg.arm_environment.service_management_endpoint) + + def refreshed_headers() -> Dict[str, str]: + headers = {'Authorization': f"Bearer {inner.token().access_token}", } + add_workspace_id_header(cfg, headers) + add_sp_management_token(cloud, headers) + return headers + + return refreshed_headers + + +@credentials_provider('github-oidc-azure', ['host', 'azure_client_id']) +def github_oidc_azure(cfg: 'Config') -> Optional[HeaderFactory]: + if 'ACTIONS_ID_TOKEN_REQUEST_TOKEN' not in os.environ: + # not in GitHub actions + return None + + # Client ID is the minimal thing we need, as otherwise we get AADSTS700016: Application with + # identifier 'https://token.actions.githubusercontent.com' was not found in the directory '...'. + if not cfg.is_azure: + return None + + # See https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/configuring-openid-connect-in-cloud-providers + headers = {'Authorization': f"Bearer {os.environ['ACTIONS_ID_TOKEN_REQUEST_TOKEN']}"} + endpoint = f"{os.environ['ACTIONS_ID_TOKEN_REQUEST_URL']}&audience=api://AzureADTokenExchange" + response = requests.get(endpoint, headers=headers) + if not response.ok: + return None + + # get the ID Token with aud=api://AzureADTokenExchange sub=repo:org/repo:environment:name + response_json = response.json() + if 'value' not in response_json: + return None + + logger.info("Configured AAD token for GitHub Actions OIDC (%s)", cfg.azure_client_id) + params = { + 'client_assertion_type': 'urn:ietf:params:oauth:client-assertion-type:jwt-bearer', + 'resource': cfg.effective_azure_login_app_id, + 'client_assertion': response_json['value'], + } + aad_endpoint = cfg.arm_environment.active_directory_endpoint + if not cfg.azure_tenant_id: + # detect Azure AD Tenant ID if it's not specified directly + token_endpoint = cfg.oidc_endpoints.token_endpoint + cfg.azure_tenant_id = token_endpoint.replace(aad_endpoint, '').split('/')[0] + inner = ClientCredentials(client_id=cfg.azure_client_id, + client_secret="", # we have no (rotatable) secrets in OIDC flow + token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token", + endpoint_params=params, + use_params=True) + + def refreshed_headers() -> Dict[str, str]: + token = inner.token() + return {'Authorization': f'{token.token_type} {token.access_token}'} + + return refreshed_headers + + +GcpScopes = ["https://www.googleapis.com/auth/cloud-platform", "https://www.googleapis.com/auth/compute"] + + +@credentials_provider('google-credentials', ['host', 'google_credentials']) +def google_credentials(cfg: 'Config') -> Optional[HeaderFactory]: + if not cfg.is_gcp: + return None + # Reads credentials as JSON. Credentials can be either a path to JSON file, or actual JSON string. + # Obtain the id token by providing the json file path and target audience. + if (os.path.isfile(cfg.google_credentials)): + with io.open(cfg.google_credentials, "r", encoding="utf-8") as json_file: + account_info = json.load(json_file) + else: + # If the file doesn't exist, assume that the config is the actual JSON content. + account_info = json.loads(cfg.google_credentials) + + credentials = service_account.IDTokenCredentials.from_service_account_info(info=account_info, + target_audience=cfg.host) + + request = Request() + + gcp_credentials = service_account.Credentials.from_service_account_info(info=account_info, + scopes=GcpScopes) + + def refreshed_headers() -> Dict[str, str]: + credentials.refresh(request) + headers = {'Authorization': f'Bearer {credentials.token}'} + if cfg.is_account_client: + gcp_credentials.refresh(request) + headers["X-Databricks-GCP-SA-Access-Token"] = gcp_credentials.token + return headers + + return refreshed_headers + + +@credentials_provider('google-id', ['host', 'google_service_account']) +def google_id(cfg: 'Config') -> Optional[HeaderFactory]: + if not cfg.is_gcp: + return None + credentials, _project_id = google.auth.default() + + # Create the impersonated credential. + target_credentials = impersonated_credentials.Credentials(source_credentials=credentials, + target_principal=cfg.google_service_account, + target_scopes=[]) + + # Set the impersonated credential, target audience and token options. + id_creds = impersonated_credentials.IDTokenCredentials(target_credentials, + target_audience=cfg.host, + include_email=True) + + gcp_impersonated_credentials = impersonated_credentials.Credentials( + source_credentials=credentials, target_principal=cfg.google_service_account, target_scopes=GcpScopes) + + request = Request() + + def refreshed_headers() -> Dict[str, str]: + id_creds.refresh(request) + headers = {'Authorization': f'Bearer {id_creds.token}'} + if cfg.is_account_client: + gcp_impersonated_credentials.refresh(request) + headers["X-Databricks-GCP-SA-Access-Token"] = gcp_impersonated_credentials.token + return headers + + return refreshed_headers + + +class CliTokenSource(Refreshable): + + def __init__(self, cmd: List[str], token_type_field: str, access_token_field: str, expiry_field: str): + super().__init__() + self._cmd = cmd + self._token_type_field = token_type_field + self._access_token_field = access_token_field + self._expiry_field = expiry_field + + @staticmethod + def _parse_expiry(expiry: str) -> datetime: + expiry = expiry.rstrip("Z").split(".")[0] + for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"): + try: + return datetime.strptime(expiry, fmt) + except ValueError as e: + last_e = e + if last_e: + raise last_e + + def refresh(self) -> Token: + try: + is_windows = sys.platform.startswith('win') + # windows requires shell=True to be able to execute 'az login' or other commands + # cannot use shell=True all the time, as it breaks macOS + out = subprocess.run(self._cmd, capture_output=True, check=True, shell=is_windows) + it = json.loads(out.stdout.decode()) + expires_on = self._parse_expiry(it[self._expiry_field]) + return Token(access_token=it[self._access_token_field], + token_type=it[self._token_type_field], + expiry=expires_on) + except ValueError as e: + raise ValueError(f"cannot unmarshal CLI result: {e}") + except subprocess.CalledProcessError as e: + stdout = e.stdout.decode().strip() + stderr = e.stderr.decode().strip() + message = stdout or stderr + raise IOError(f'cannot get access token: {message}') from e + + +class AzureCliTokenSource(CliTokenSource): + """ Obtain the token granted by `az login` CLI command """ + + def __init__(self, resource: str, subscription: str = ""): + cmd = ["az", "account", "get-access-token", "--resource", resource, "--output", "json"] + if subscription != "": + cmd.append("--subscription") + cmd.append(subscription) + super().__init__(cmd=cmd, + token_type_field='tokenType', + access_token_field='accessToken', + expiry_field='expiresOn') + + def is_human_user(self) -> bool: + """The UPN claim is the username of the user, but not the Service Principal. + + Azure CLI can be authenticated by both human users (`az login`) and service principals. In case of service + principals, it can be either OIDC from GitHub or login with a password: + + ~ $ az login --service-principal --user $clientID --password $clientSecret --tenant $tenantID + + Human users get more claims: + - 'amr' - how the subject of the token was authenticated + - 'name', 'family_name', 'given_name' - human-readable values that identifies the subject of the token + - 'scp' with `user_impersonation` value, that shows the set of scopes exposed by your application for which + the client application has requested (and received) consent + - 'unique_name' - a human-readable value that identifies the subject of the token. This value is not + guaranteed to be unique within a tenant and should be used only for display purposes. + - 'upn' - The username of the user. + """ + return 'upn' in self.token().jwt_claims() + + @staticmethod + def for_resource(cfg: 'Config', resource: str) -> 'AzureCliTokenSource': + subscription = AzureCliTokenSource.get_subscription(cfg) + if subscription != "": + token_source = AzureCliTokenSource(resource, subscription) + try: + # This will fail if the user has access to the workspace, but not to the subscription + # itself. + # In such case, we fall back to not using the subscription. + token_source.token() + return token_source + except OSError: + logger.warning("Failed to get token for subscription. Using resource only token.") + + token_source = AzureCliTokenSource(resource) + token_source.token() + return token_source + + @staticmethod + def get_subscription(cfg: 'Config') -> str: + resource = cfg.azure_workspace_resource_id + if resource is None or resource == "": + return "" + components = resource.split('/') + if len(components) < 3: + logger.warning("Invalid azure workspace resource ID") + return "" + return components[2] + + +@credentials_provider('azure-cli', ['is_azure']) +def azure_cli(cfg: 'Config') -> Optional[HeaderFactory]: + """ Adds refreshed OAuth token granted by `az login` command to every request. """ + token_source = None + mgmt_token_source = None + try: + token_source = AzureCliTokenSource.for_resource(cfg, cfg.effective_azure_login_app_id) + except FileNotFoundError: + doc = 'https://docs.microsoft.com/en-us/cli/azure/?view=azure-cli-latest' + logger.debug(f'Most likely Azure CLI is not installed. See {doc} for details') + return None + except OSError as e: + logger.debug('skipping Azure CLI auth', exc_info=e) + logger.debug('This may happen if you are attempting to login to a dev or staging workspace') + return None + + if not token_source.is_human_user(): + try: + management_endpoint = cfg.arm_environment.service_management_endpoint + mgmt_token_source = AzureCliTokenSource.for_resource(cfg, management_endpoint) + except Exception as e: + logger.debug(f'Not including service management token in headers', exc_info=e) + mgmt_token_source = None + + _ensure_host_present(cfg, lambda resource: AzureCliTokenSource.for_resource(cfg, resource)) + logger.info("Using Azure CLI authentication with AAD tokens") + if not cfg.is_account_client and AzureCliTokenSource.get_subscription(cfg) == "": + logger.warning( + "azure_workspace_resource_id field not provided. " + "It is recommended to specify this field in the Databricks configuration to avoid authentication errors." + ) + + def inner() -> Dict[str, str]: + token = token_source.token() + headers = {'Authorization': f'{token.token_type} {token.access_token}'} + add_workspace_id_header(cfg, headers) + if mgmt_token_source: + add_sp_management_token(mgmt_token_source, headers) + return headers + + return inner + + +class DatabricksCliTokenSource(CliTokenSource): + """ Obtain the token granted by `databricks auth login` CLI command """ + + def __init__(self, cfg: 'Config'): + args = ['auth', 'token', '--host', cfg.host] + if cfg.is_account_client: + args += ['--account-id', cfg.account_id] + + cli_path = cfg.databricks_cli_path + if not cli_path: + cli_path = 'databricks' + + # If the path is unqualified, look it up in PATH. + if cli_path.count("/") == 0: + cli_path = self.__class__._find_executable(cli_path) + + super().__init__(cmd=[cli_path, *args], + token_type_field='token_type', + access_token_field='access_token', + expiry_field='expiry') + + @staticmethod + def _find_executable(name) -> str: + err = FileNotFoundError("Most likely the Databricks CLI is not installed") + for dir in os.getenv("PATH", default="").split(os.path.pathsep): + path = pathlib.Path(dir).joinpath(name).resolve() + if not path.is_file(): + continue + + # The new Databricks CLI is a single binary with size > 1MB. + # We use the size as a signal to determine which Databricks CLI is installed. + stat = path.stat() + if stat.st_size < (1024 * 1024): + err = FileNotFoundError("Databricks CLI version <0.100.0 detected") + continue + + return str(path) + + raise err + + +@credentials_provider('databricks-cli', ['host', 'is_aws']) +def databricks_cli(cfg: 'Config') -> Optional[HeaderFactory]: + try: + token_source = DatabricksCliTokenSource(cfg) + except FileNotFoundError as e: + logger.debug(e) + return None + + try: + token_source.token() + except IOError as e: + if 'databricks OAuth is not' in str(e): + logger.debug(f'OAuth not configured or not available: {e}') + return None + raise e + + logger.info("Using Databricks CLI authentication") + + def inner() -> Dict[str, str]: + token = token_source.token() + return {'Authorization': f'{token.token_type} {token.access_token}'} + + return inner + + +class MetadataServiceTokenSource(Refreshable): + """ Obtain the token granted by Databricks Metadata Service """ + METADATA_SERVICE_VERSION = "1" + METADATA_SERVICE_VERSION_HEADER = "X-Databricks-Metadata-Version" + METADATA_SERVICE_HOST_HEADER = "X-Databricks-Host" + _metadata_service_timeout = 10 # seconds + + def __init__(self, cfg: 'Config'): + super().__init__() + self.url = cfg.metadata_service_url + self.host = cfg.host + + def refresh(self) -> Token: + resp = requests.get(self.url, + timeout=self._metadata_service_timeout, + headers={ + self.METADATA_SERVICE_VERSION_HEADER: self.METADATA_SERVICE_VERSION, + self.METADATA_SERVICE_HOST_HEADER: self.host + }) + json_resp: dict[str, Union[str, float]] = resp.json() + access_token = json_resp.get("access_token", None) + if access_token is None: + raise ValueError("Metadata Service returned empty token") + token_type = json_resp.get("token_type", None) + if token_type is None: + raise ValueError("Metadata Service returned empty token type") + if json_resp["expires_on"] in ["", None]: + raise ValueError("Metadata Service returned invalid expiry") + try: + expiry = datetime.fromtimestamp(json_resp["expires_on"]) + except: + raise ValueError("Metadata Service returned invalid expiry") + + return Token(access_token=access_token, token_type=token_type, expiry=expiry) + + +@credentials_provider('metadata-service', ['host', 'metadata_service_url']) +def metadata_service(cfg: 'Config') -> Optional[HeaderFactory]: + """ Adds refreshed token granted by Databricks Metadata Service to every request. """ + + token_source = MetadataServiceTokenSource(cfg) + token_source.token() + logger.info("Using Databricks Metadata Service authentication") + + def inner() -> Dict[str, str]: + token = token_source.token() + return {'Authorization': f'{token.token_type} {token.access_token}'} + + return inner + + +class DefaultCredentials: + """ Select the first applicable credential provider from the chain """ + + def __init__(self) -> None: + self._auth_type = 'default' + + def auth_type(self) -> str: + return self._auth_type + + def __call__(self, cfg: 'Config') -> HeaderFactory: + auth_providers = [ + pat_auth, basic_auth, metadata_service, oauth_service_principal, azure_service_principal, + github_oidc_azure, azure_cli, external_browser, databricks_cli, runtime_native_auth, + google_credentials, google_id + ] + for provider in auth_providers: + auth_type = provider.auth_type() + if cfg.auth_type and auth_type != cfg.auth_type: + # ignore other auth types if one is explicitly enforced + logger.debug(f"Ignoring {auth_type} auth, because {cfg.auth_type} is preferred") + continue + logger.debug(f'Attempting to configure auth: {auth_type}') + try: + header_factory = provider(cfg) + if not header_factory: + continue + self._auth_type = auth_type + return header_factory + except Exception as e: + raise ValueError(f'{auth_type}: {e}') from e + auth_flow_url = "https://docs.databricks.com/en/dev-tools/auth.html#databricks-client-unified-authentication" + raise ValueError( + f'cannot configure default credentials, please check {auth_flow_url} to configure credentials for your preferred authentication method.' + ) diff --git a/databricks/sdk/environments.py b/databricks/sdk/environments.py new file mode 100644 index 000000000..ee41f0f96 --- /dev/null +++ b/databricks/sdk/environments.py @@ -0,0 +1,72 @@ +from dataclasses import dataclass +from enum import Enum +from typing import Optional + +from .azure import ARM_DATABRICKS_RESOURCE_ID, ENVIRONMENTS, AzureEnvironment + + +class Cloud(Enum): + AWS = "AWS" + AZURE = "AZURE" + GCP = "GCP" + + +@dataclass +class DatabricksEnvironment: + cloud: Cloud + dns_zone: str + azure_application_id: Optional[str] = None + azure_environment: Optional[AzureEnvironment] = None + + def deployment_url(self, name: str) -> str: + return f"https://{name}.{self.dns_zone}" + + @property + def azure_service_management_endpoint(self) -> Optional[str]: + if self.azure_environment is None: + return None + return self.azure_environment.service_management_endpoint + + @property + def azure_resource_manager_endpoint(self) -> Optional[str]: + if self.azure_environment is None: + return None + return self.azure_environment.resource_manager_endpoint + + @property + def azure_active_directory_endpoint(self) -> Optional[str]: + if self.azure_environment is None: + return None + return self.azure_environment.active_directory_endpoint + + +DEFAULT_ENVIRONMENT = DatabricksEnvironment(Cloud.AWS, ".cloud.databricks.com") + +ALL_ENVS = [ + DatabricksEnvironment(Cloud.AWS, ".dev.databricks.com"), + DatabricksEnvironment(Cloud.AWS, ".staging.cloud.databricks.com"), + DatabricksEnvironment(Cloud.AWS, ".cloud.databricks.us"), DEFAULT_ENVIRONMENT, + DatabricksEnvironment(Cloud.AZURE, + ".dev.azuredatabricks.net", + azure_application_id="62a912ac-b58e-4c1d-89ea-b2dbfc7358fc", + azure_environment=ENVIRONMENTS["PUBLIC"]), + DatabricksEnvironment(Cloud.AZURE, + ".staging.azuredatabricks.net", + azure_application_id="4a67d088-db5c-48f1-9ff2-0aace800ae68", + azure_environment=ENVIRONMENTS["PUBLIC"]), + DatabricksEnvironment(Cloud.AZURE, + ".azuredatabricks.net", + azure_application_id=ARM_DATABRICKS_RESOURCE_ID, + azure_environment=ENVIRONMENTS["PUBLIC"]), + DatabricksEnvironment(Cloud.AZURE, + ".databricks.azure.us", + azure_application_id=ARM_DATABRICKS_RESOURCE_ID, + azure_environment=ENVIRONMENTS["USGOVERNMENT"]), + DatabricksEnvironment(Cloud.AZURE, + ".databricks.azure.cn", + azure_application_id=ARM_DATABRICKS_RESOURCE_ID, + azure_environment=ENVIRONMENTS["CHINA"]), + DatabricksEnvironment(Cloud.GCP, ".dev.gcp.databricks.com"), + DatabricksEnvironment(Cloud.GCP, ".staging.gcp.databricks.com"), + DatabricksEnvironment(Cloud.GCP, ".gcp.databricks.com") +] diff --git a/databricks/sdk/oauth.py b/databricks/sdk/oauth.py index 9a3061e1d..68b88f003 100644 --- a/databricks/sdk/oauth.py +++ b/databricks/sdk/oauth.py @@ -357,7 +357,8 @@ def __init__(self, scopes: List[str] = None, client_secret: str = None): # TODO: is it a circular dependency?.. - from .core import Config, credentials_provider + from .core import Config + from .credentials_provider import credentials_provider @credentials_provider('noop', []) def noop_credentials(_: any): diff --git a/databricks/sdk/service/_internal.py b/databricks/sdk/service/_internal.py index fdb9a694c..1a04f8aeb 100644 --- a/databricks/sdk/service/_internal.py +++ b/databricks/sdk/service/_internal.py @@ -10,7 +10,7 @@ def _from_dict(d: Dict[str, any], field: str, cls: Type) -> any: def _repeated_dict(d: Dict[str, any], field: str, cls: Type) -> any: if field not in d or not d[field]: - return None + return [] from_dict = getattr(cls, 'from_dict') return [from_dict(v) for v in d[field]] diff --git a/databricks/sdk/service/catalog.py b/databricks/sdk/service/catalog.py index 6e51eb645..c1c068aff 100755 --- a/databricks/sdk/service/catalog.py +++ b/databricks/sdk/service/catalog.py @@ -1162,6 +1162,97 @@ def from_dict(cls, d: Dict[str, any]) -> CreateMetastoreAssignment: workspace_id=d.get('workspace_id', None)) +@dataclass +class CreateMonitor: + assets_dir: str + """The directory to store monitoring assets (e.g. dashboard, metric tables).""" + + output_schema_name: str + """Schema where output metric tables are created.""" + + baseline_table_name: Optional[str] = None + """Name of the baseline table from which drift metrics are computed from. Columns in the monitored + table should also be present in the baseline table.""" + + custom_metrics: Optional[List[MonitorCustomMetric]] = None + """Custom metrics to compute on the monitored table. These can be aggregate metrics, derived + metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across + time windows).""" + + data_classification_config: Optional[MonitorDataClassificationConfig] = None + """The data classification config for the monitor.""" + + full_name: Optional[str] = None + """Full name of the table.""" + + inference_log: Optional[MonitorInferenceLogProfileType] = None + """Configuration for monitoring inference logs.""" + + notifications: Optional[List[MonitorNotificationsConfig]] = None + """The notification settings for the monitor.""" + + schedule: Optional[MonitorCronSchedule] = None + """The schedule for automatically updating and refreshing metric tables.""" + + skip_builtin_dashboard: Optional[bool] = None + """Whether to skip creating a default dashboard summarizing data quality metrics.""" + + slicing_exprs: Optional[List[str]] = None + """List of column expressions to slice data with for targeted analysis. The data is grouped by each + expression independently, resulting in a separate slice for each predicate and its complements. + For high-cardinality columns, only the top 100 unique values by frequency will generate slices.""" + + snapshot: Optional[Any] = None + """Configuration for monitoring snapshot tables.""" + + time_series: Optional[MonitorTimeSeriesProfileType] = None + """Configuration for monitoring time series tables.""" + + warehouse_id: Optional[str] = None + """Optional argument to specify the warehouse for dashboard creation. If not specified, the first + running warehouse will be used.""" + + def as_dict(self) -> dict: + """Serializes the CreateMonitor into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.assets_dir is not None: body['assets_dir'] = self.assets_dir + if self.baseline_table_name is not None: body['baseline_table_name'] = self.baseline_table_name + if self.custom_metrics: body['custom_metrics'] = [v.as_dict() for v in self.custom_metrics] + if self.data_classification_config: + body['data_classification_config'] = self.data_classification_config.as_dict() + if self.full_name is not None: body['full_name'] = self.full_name + if self.inference_log: body['inference_log'] = self.inference_log.as_dict() + if self.notifications: body['notifications'] = [v.as_dict() for v in self.notifications] + if self.output_schema_name is not None: body['output_schema_name'] = self.output_schema_name + if self.schedule: body['schedule'] = self.schedule.as_dict() + if self.skip_builtin_dashboard is not None: + body['skip_builtin_dashboard'] = self.skip_builtin_dashboard + if self.slicing_exprs: body['slicing_exprs'] = [v for v in self.slicing_exprs] + if self.snapshot: body['snapshot'] = self.snapshot + if self.time_series: body['time_series'] = self.time_series.as_dict() + if self.warehouse_id is not None: body['warehouse_id'] = self.warehouse_id + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> CreateMonitor: + """Deserializes the CreateMonitor from a dictionary.""" + return cls(assets_dir=d.get('assets_dir', None), + baseline_table_name=d.get('baseline_table_name', None), + custom_metrics=_repeated_dict(d, 'custom_metrics', MonitorCustomMetric), + data_classification_config=_from_dict(d, 'data_classification_config', + MonitorDataClassificationConfig), + full_name=d.get('full_name', None), + inference_log=_from_dict(d, 'inference_log', MonitorInferenceLogProfileType), + notifications=_repeated_dict(d, 'notifications', MonitorNotificationsConfig), + output_schema_name=d.get('output_schema_name', None), + schedule=_from_dict(d, 'schedule', MonitorCronSchedule), + skip_builtin_dashboard=d.get('skip_builtin_dashboard', None), + slicing_exprs=d.get('slicing_exprs', None), + snapshot=d.get('snapshot', None), + time_series=_from_dict(d, 'time_series', MonitorTimeSeriesProfileType), + warehouse_id=d.get('warehouse_id', None)) + + @dataclass class CreateRegisteredModelRequest: catalog_name: str @@ -2242,17 +2333,23 @@ class ListExternalLocationsResponse: external_locations: Optional[List[ExternalLocationInfo]] = None """An array of external locations.""" + next_page_token: Optional[str] = None + """Opaque token to retrieve the next page of results. Absent if there are no more pages. + __page_token__ should be set to this value for the next request (for the next page of results).""" + def as_dict(self) -> dict: """Serializes the ListExternalLocationsResponse into a dictionary suitable for use as a JSON request body.""" body = {} if self.external_locations: body['external_locations'] = [v.as_dict() for v in self.external_locations] + if self.next_page_token is not None: body['next_page_token'] = self.next_page_token return body @classmethod def from_dict(cls, d: Dict[str, any]) -> ListExternalLocationsResponse: """Deserializes the ListExternalLocationsResponse from a dictionary.""" - return cls(external_locations=_repeated_dict(d, 'external_locations', ExternalLocationInfo)) + return cls(external_locations=_repeated_dict(d, 'external_locations', ExternalLocationInfo), + next_page_token=d.get('next_page_token', None)) @dataclass @@ -2260,16 +2357,22 @@ class ListFunctionsResponse: functions: Optional[List[FunctionInfo]] = None """An array of function information objects.""" + next_page_token: Optional[str] = None + """Opaque token to retrieve the next page of results. Absent if there are no more pages. + __page_token__ should be set to this value for the next request (for the next page of results).""" + def as_dict(self) -> dict: """Serializes the ListFunctionsResponse into a dictionary suitable for use as a JSON request body.""" body = {} if self.functions: body['functions'] = [v.as_dict() for v in self.functions] + if self.next_page_token is not None: body['next_page_token'] = self.next_page_token return body @classmethod def from_dict(cls, d: Dict[str, any]) -> ListFunctionsResponse: """Deserializes the ListFunctionsResponse from a dictionary.""" - return cls(functions=_repeated_dict(d, 'functions', FunctionInfo)) + return cls(functions=_repeated_dict(d, 'functions', FunctionInfo), + next_page_token=d.get('next_page_token', None)) @dataclass @@ -2294,7 +2397,8 @@ class ListModelVersionsResponse: model_versions: Optional[List[ModelVersionInfo]] = None next_page_token: Optional[str] = None - """Token to retrieve the next page of results""" + """Opaque token to retrieve the next page of results. Absent if there are no more pages. + __page_token__ should be set to this value for the next request (for the next page of results).""" def as_dict(self) -> dict: """Serializes the ListModelVersionsResponse into a dictionary suitable for use as a JSON request body.""" @@ -2334,28 +2438,39 @@ def from_dict(cls, d: Dict[str, any]) -> ListRegisteredModelsResponse: @dataclass class ListSchemasResponse: + next_page_token: Optional[str] = None + """Opaque token to retrieve the next page of results. Absent if there are no more pages. + __page_token__ should be set to this value for the next request (for the next page of results).""" + schemas: Optional[List[SchemaInfo]] = None """An array of schema information objects.""" def as_dict(self) -> dict: """Serializes the ListSchemasResponse into a dictionary suitable for use as a JSON request body.""" body = {} + if self.next_page_token is not None: body['next_page_token'] = self.next_page_token if self.schemas: body['schemas'] = [v.as_dict() for v in self.schemas] return body @classmethod def from_dict(cls, d: Dict[str, any]) -> ListSchemasResponse: """Deserializes the ListSchemasResponse from a dictionary.""" - return cls(schemas=_repeated_dict(d, 'schemas', SchemaInfo)) + return cls(next_page_token=d.get('next_page_token', None), + schemas=_repeated_dict(d, 'schemas', SchemaInfo)) @dataclass class ListStorageCredentialsResponse: + next_page_token: Optional[str] = None + """Opaque token to retrieve the next page of results. Absent if there are no more pages. + __page_token__ should be set to this value for the next request (for the next page of results).""" + storage_credentials: Optional[List[StorageCredentialInfo]] = None def as_dict(self) -> dict: """Serializes the ListStorageCredentialsResponse into a dictionary suitable for use as a JSON request body.""" body = {} + if self.next_page_token is not None: body['next_page_token'] = self.next_page_token if self.storage_credentials: body['storage_credentials'] = [v.as_dict() for v in self.storage_credentials] return body @@ -2363,7 +2478,8 @@ def as_dict(self) -> dict: @classmethod def from_dict(cls, d: Dict[str, any]) -> ListStorageCredentialsResponse: """Deserializes the ListStorageCredentialsResponse from a dictionary.""" - return cls(storage_credentials=_repeated_dict(d, 'storage_credentials', StorageCredentialInfo)) + return cls(next_page_token=d.get('next_page_token', None), + storage_credentials=_repeated_dict(d, 'storage_credentials', StorageCredentialInfo)) @dataclass @@ -2386,7 +2502,8 @@ def from_dict(cls, d: Dict[str, any]) -> ListSystemSchemasResponse: @dataclass class ListTableSummariesResponse: next_page_token: Optional[str] = None - """Opaque token for pagination. Omitted if there are no more results.""" + """Opaque token to retrieve the next page of results. Absent if there are no more pages. + __page_token__ should be set to this value for the next request (for the next page of results).""" tables: Optional[List[TableSummary]] = None """List of table summaries.""" @@ -2408,8 +2525,8 @@ def from_dict(cls, d: Dict[str, any]) -> ListTableSummariesResponse: @dataclass class ListTablesResponse: next_page_token: Optional[str] = None - """Opaque token for pagination. Omitted if there are no more results. page_token should be set to - this value for fetching the next page.""" + """Opaque token to retrieve the next page of results. Absent if there are no more pages. + __page_token__ should be set to this value for the next request (for the next page of results).""" tables: Optional[List[TableInfo]] = None """An array of table information objects.""" @@ -2705,6 +2822,342 @@ class ModelVersionInfoStatus(Enum): READY = 'READY' +@dataclass +class MonitorCronSchedule: + pause_status: Optional[MonitorCronSchedulePauseStatus] = None + """Whether the schedule is paused or not""" + + quartz_cron_expression: Optional[str] = None + """A cron expression using quartz syntax that describes the schedule for a job.""" + + timezone_id: Optional[str] = None + """A Java timezone id. The schedule for a job will be resolved with respect to this timezone.""" + + def as_dict(self) -> dict: + """Serializes the MonitorCronSchedule into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.pause_status is not None: body['pause_status'] = self.pause_status.value + if self.quartz_cron_expression is not None: + body['quartz_cron_expression'] = self.quartz_cron_expression + if self.timezone_id is not None: body['timezone_id'] = self.timezone_id + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorCronSchedule: + """Deserializes the MonitorCronSchedule from a dictionary.""" + return cls(pause_status=_enum(d, 'pause_status', MonitorCronSchedulePauseStatus), + quartz_cron_expression=d.get('quartz_cron_expression', None), + timezone_id=d.get('timezone_id', None)) + + +class MonitorCronSchedulePauseStatus(Enum): + """Whether the schedule is paused or not""" + + PAUSED = 'PAUSED' + UNPAUSED = 'UNPAUSED' + + +@dataclass +class MonitorCustomMetric: + definition: Optional[str] = None + """Jinja template for a SQL expression that specifies how to compute the metric. See [create metric + definition]. + + [create metric definition]: https://docs.databricks.com/en/lakehouse-monitoring/custom-metrics.html#create-definition""" + + input_columns: Optional[List[str]] = None + """Columns on the monitored table to apply the custom metrics to.""" + + name: Optional[str] = None + """Name of the custom metric.""" + + output_data_type: Optional[str] = None + """The output type of the custom metric.""" + + type: Optional[MonitorCustomMetricType] = None + """The type of the custom metric.""" + + def as_dict(self) -> dict: + """Serializes the MonitorCustomMetric into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.definition is not None: body['definition'] = self.definition + if self.input_columns: body['input_columns'] = [v for v in self.input_columns] + if self.name is not None: body['name'] = self.name + if self.output_data_type is not None: body['output_data_type'] = self.output_data_type + if self.type is not None: body['type'] = self.type.value + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorCustomMetric: + """Deserializes the MonitorCustomMetric from a dictionary.""" + return cls(definition=d.get('definition', None), + input_columns=d.get('input_columns', None), + name=d.get('name', None), + output_data_type=d.get('output_data_type', None), + type=_enum(d, 'type', MonitorCustomMetricType)) + + +class MonitorCustomMetricType(Enum): + """The type of the custom metric.""" + + CUSTOM_METRIC_TYPE_AGGREGATE = 'CUSTOM_METRIC_TYPE_AGGREGATE' + CUSTOM_METRIC_TYPE_DERIVED = 'CUSTOM_METRIC_TYPE_DERIVED' + CUSTOM_METRIC_TYPE_DRIFT = 'CUSTOM_METRIC_TYPE_DRIFT' + MONITOR_STATUS_ERROR = 'MONITOR_STATUS_ERROR' + MONITOR_STATUS_FAILED = 'MONITOR_STATUS_FAILED' + + +@dataclass +class MonitorDataClassificationConfig: + enabled: Optional[bool] = None + """Whether data classification is enabled.""" + + def as_dict(self) -> dict: + """Serializes the MonitorDataClassificationConfig into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.enabled is not None: body['enabled'] = self.enabled + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorDataClassificationConfig: + """Deserializes the MonitorDataClassificationConfig from a dictionary.""" + return cls(enabled=d.get('enabled', None)) + + +@dataclass +class MonitorDestinations: + email_addresses: Optional[List[str]] = None + """The list of email addresses to send the notification to.""" + + def as_dict(self) -> dict: + """Serializes the MonitorDestinations into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.email_addresses: body['email_addresses'] = [v for v in self.email_addresses] + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorDestinations: + """Deserializes the MonitorDestinations from a dictionary.""" + return cls(email_addresses=d.get('email_addresses', None)) + + +@dataclass +class MonitorInferenceLogProfileType: + granularities: Optional[List[str]] = None + """List of granularities to use when aggregating data into time windows based on their timestamp.""" + + label_col: Optional[str] = None + """Column of the model label.""" + + model_id_col: Optional[str] = None + """Column of the model id or version.""" + + prediction_col: Optional[str] = None + """Column of the model prediction.""" + + prediction_proba_col: Optional[str] = None + """Column of the model prediction probabilities.""" + + problem_type: Optional[MonitorInferenceLogProfileTypeProblemType] = None + """Problem type the model aims to solve.""" + + timestamp_col: Optional[str] = None + """Column of the timestamp of predictions.""" + + def as_dict(self) -> dict: + """Serializes the MonitorInferenceLogProfileType into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.granularities: body['granularities'] = [v for v in self.granularities] + if self.label_col is not None: body['label_col'] = self.label_col + if self.model_id_col is not None: body['model_id_col'] = self.model_id_col + if self.prediction_col is not None: body['prediction_col'] = self.prediction_col + if self.prediction_proba_col is not None: body['prediction_proba_col'] = self.prediction_proba_col + if self.problem_type is not None: body['problem_type'] = self.problem_type.value + if self.timestamp_col is not None: body['timestamp_col'] = self.timestamp_col + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorInferenceLogProfileType: + """Deserializes the MonitorInferenceLogProfileType from a dictionary.""" + return cls(granularities=d.get('granularities', None), + label_col=d.get('label_col', None), + model_id_col=d.get('model_id_col', None), + prediction_col=d.get('prediction_col', None), + prediction_proba_col=d.get('prediction_proba_col', None), + problem_type=_enum(d, 'problem_type', MonitorInferenceLogProfileTypeProblemType), + timestamp_col=d.get('timestamp_col', None)) + + +class MonitorInferenceLogProfileTypeProblemType(Enum): + """Problem type the model aims to solve.""" + + PROBLEM_TYPE_CLASSIFICATION = 'PROBLEM_TYPE_CLASSIFICATION' + PROBLEM_TYPE_REGRESSION = 'PROBLEM_TYPE_REGRESSION' + + +@dataclass +class MonitorInfo: + assets_dir: Optional[str] = None + """The directory to store monitoring assets (e.g. dashboard, metric tables).""" + + baseline_table_name: Optional[str] = None + """Name of the baseline table from which drift metrics are computed from. Columns in the monitored + table should also be present in the baseline table.""" + + custom_metrics: Optional[List[MonitorCustomMetric]] = None + """Custom metrics to compute on the monitored table. These can be aggregate metrics, derived + metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across + time windows).""" + + dashboard_id: Optional[str] = None + """The ID of the generated dashboard.""" + + data_classification_config: Optional[MonitorDataClassificationConfig] = None + """The data classification config for the monitor.""" + + drift_metrics_table_name: Optional[str] = None + """The full name of the drift metrics table. Format: + __catalog_name__.__schema_name__.__table_name__.""" + + inference_log: Optional[MonitorInferenceLogProfileType] = None + """Configuration for monitoring inference logs.""" + + latest_monitor_failure_msg: Optional[str] = None + """The latest failure message of the monitor (if any).""" + + monitor_version: Optional[str] = None + """The version of the monitor config (e.g. 1,2,3). If negative, the monitor may be corrupted.""" + + notifications: Optional[List[MonitorNotificationsConfig]] = None + """The notification settings for the monitor.""" + + output_schema_name: Optional[str] = None + """Schema where output metric tables are created.""" + + profile_metrics_table_name: Optional[str] = None + """The full name of the profile metrics table. Format: + __catalog_name__.__schema_name__.__table_name__.""" + + schedule: Optional[MonitorCronSchedule] = None + """The schedule for automatically updating and refreshing metric tables.""" + + slicing_exprs: Optional[List[str]] = None + """List of column expressions to slice data with for targeted analysis. The data is grouped by each + expression independently, resulting in a separate slice for each predicate and its complements. + For high-cardinality columns, only the top 100 unique values by frequency will generate slices.""" + + snapshot: Optional[Any] = None + """Configuration for monitoring snapshot tables.""" + + status: Optional[MonitorInfoStatus] = None + """The status of the monitor.""" + + table_name: Optional[str] = None + """The full name of the table to monitor. Format: __catalog_name__.__schema_name__.__table_name__.""" + + time_series: Optional[MonitorTimeSeriesProfileType] = None + """Configuration for monitoring time series tables.""" + + def as_dict(self) -> dict: + """Serializes the MonitorInfo into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.assets_dir is not None: body['assets_dir'] = self.assets_dir + if self.baseline_table_name is not None: body['baseline_table_name'] = self.baseline_table_name + if self.custom_metrics: body['custom_metrics'] = [v.as_dict() for v in self.custom_metrics] + if self.dashboard_id is not None: body['dashboard_id'] = self.dashboard_id + if self.data_classification_config: + body['data_classification_config'] = self.data_classification_config.as_dict() + if self.drift_metrics_table_name is not None: + body['drift_metrics_table_name'] = self.drift_metrics_table_name + if self.inference_log: body['inference_log'] = self.inference_log.as_dict() + if self.latest_monitor_failure_msg is not None: + body['latest_monitor_failure_msg'] = self.latest_monitor_failure_msg + if self.monitor_version is not None: body['monitor_version'] = self.monitor_version + if self.notifications: body['notifications'] = [v.as_dict() for v in self.notifications] + if self.output_schema_name is not None: body['output_schema_name'] = self.output_schema_name + if self.profile_metrics_table_name is not None: + body['profile_metrics_table_name'] = self.profile_metrics_table_name + if self.schedule: body['schedule'] = self.schedule.as_dict() + if self.slicing_exprs: body['slicing_exprs'] = [v for v in self.slicing_exprs] + if self.snapshot: body['snapshot'] = self.snapshot + if self.status is not None: body['status'] = self.status.value + if self.table_name is not None: body['table_name'] = self.table_name + if self.time_series: body['time_series'] = self.time_series.as_dict() + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorInfo: + """Deserializes the MonitorInfo from a dictionary.""" + return cls(assets_dir=d.get('assets_dir', None), + baseline_table_name=d.get('baseline_table_name', None), + custom_metrics=_repeated_dict(d, 'custom_metrics', MonitorCustomMetric), + dashboard_id=d.get('dashboard_id', None), + data_classification_config=_from_dict(d, 'data_classification_config', + MonitorDataClassificationConfig), + drift_metrics_table_name=d.get('drift_metrics_table_name', None), + inference_log=_from_dict(d, 'inference_log', MonitorInferenceLogProfileType), + latest_monitor_failure_msg=d.get('latest_monitor_failure_msg', None), + monitor_version=d.get('monitor_version', None), + notifications=_repeated_dict(d, 'notifications', MonitorNotificationsConfig), + output_schema_name=d.get('output_schema_name', None), + profile_metrics_table_name=d.get('profile_metrics_table_name', None), + schedule=_from_dict(d, 'schedule', MonitorCronSchedule), + slicing_exprs=d.get('slicing_exprs', None), + snapshot=d.get('snapshot', None), + status=_enum(d, 'status', MonitorInfoStatus), + table_name=d.get('table_name', None), + time_series=_from_dict(d, 'time_series', MonitorTimeSeriesProfileType)) + + +class MonitorInfoStatus(Enum): + """The status of the monitor.""" + + MONITOR_STATUS_ACTIVE = 'MONITOR_STATUS_ACTIVE' + MONITOR_STATUS_DELETE_PENDING = 'MONITOR_STATUS_DELETE_PENDING' + MONITOR_STATUS_ERROR = 'MONITOR_STATUS_ERROR' + MONITOR_STATUS_FAILED = 'MONITOR_STATUS_FAILED' + MONITOR_STATUS_PENDING = 'MONITOR_STATUS_PENDING' + + +@dataclass +class MonitorNotificationsConfig: + on_failure: Optional[MonitorDestinations] = None + """Who to send notifications to on monitor failure.""" + + def as_dict(self) -> dict: + """Serializes the MonitorNotificationsConfig into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.on_failure: body['on_failure'] = self.on_failure.as_dict() + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorNotificationsConfig: + """Deserializes the MonitorNotificationsConfig from a dictionary.""" + return cls(on_failure=_from_dict(d, 'on_failure', MonitorDestinations)) + + +@dataclass +class MonitorTimeSeriesProfileType: + granularities: Optional[List[str]] = None + """List of granularities to use when aggregating data into time windows based on their timestamp.""" + + timestamp_col: Optional[str] = None + """The timestamp column. This must be timestamp types or convertible to timestamp types using the + pyspark to_timestamp function.""" + + def as_dict(self) -> dict: + """Serializes the MonitorTimeSeriesProfileType into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.granularities: body['granularities'] = [v for v in self.granularities] + if self.timestamp_col is not None: body['timestamp_col'] = self.timestamp_col + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorTimeSeriesProfileType: + """Deserializes the MonitorTimeSeriesProfileType from a dictionary.""" + return cls(granularities=d.get('granularities', None), timestamp_col=d.get('timestamp_col', None)) + + @dataclass class NamedTableConstraint: name: str @@ -3340,23 +3793,6 @@ def from_dict(cls, d: Dict[str, any]) -> TableConstraint: primary_key_constraint=_from_dict(d, 'primary_key_constraint', PrimaryKeyConstraint)) -@dataclass -class TableConstraintList: - table_constraints: Optional[List[TableConstraint]] = None - """List of table constraints. Note: this field is not set in the output of the __listTables__ API.""" - - def as_dict(self) -> dict: - """Serializes the TableConstraintList into a dictionary suitable for use as a JSON request body.""" - body = {} - if self.table_constraints: body['table_constraints'] = [v.as_dict() for v in self.table_constraints] - return body - - @classmethod - def from_dict(cls, d: Dict[str, any]) -> TableConstraintList: - """Deserializes the TableConstraintList from a dictionary.""" - return cls(table_constraints=_repeated_dict(d, 'table_constraints', TableConstraint)) - - @dataclass class TableDependency: """A table that is dependent on a SQL object.""" @@ -3377,6 +3813,23 @@ def from_dict(cls, d: Dict[str, any]) -> TableDependency: return cls(table_full_name=d.get('table_full_name', None)) +@dataclass +class TableExistsResponse: + table_exists: Optional[bool] = None + """Whether the table exists or not.""" + + def as_dict(self) -> dict: + """Serializes the TableExistsResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.table_exists is not None: body['table_exists'] = self.table_exists + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> TableExistsResponse: + """Deserializes the TableExistsResponse from a dictionary.""" + return cls(table_exists=d.get('table_exists', None)) + + @dataclass class TableInfo: access_point: Optional[str] = None @@ -3451,7 +3904,8 @@ class TableInfo: storage_location: Optional[str] = None """Storage root URL for table (for **MANAGED**, **EXTERNAL** tables)""" - table_constraints: Optional[TableConstraintList] = None + table_constraints: Optional[List[TableConstraint]] = None + """List of table constraints. Note: this field is not set in the output of the __listTables__ API.""" table_id: Optional[str] = None """Name of table, relative to parent schema.""" @@ -3508,7 +3962,7 @@ def as_dict(self) -> dict: if self.storage_credential_name is not None: body['storage_credential_name'] = self.storage_credential_name if self.storage_location is not None: body['storage_location'] = self.storage_location - if self.table_constraints: body['table_constraints'] = self.table_constraints.as_dict() + if self.table_constraints: body['table_constraints'] = [v.as_dict() for v in self.table_constraints] if self.table_id is not None: body['table_id'] = self.table_id if self.table_type is not None: body['table_type'] = self.table_type.value if self.updated_at is not None: body['updated_at'] = self.updated_at @@ -3547,7 +4001,7 @@ def from_dict(cls, d: Dict[str, any]) -> TableInfo: sql_path=d.get('sql_path', None), storage_credential_name=d.get('storage_credential_name', None), storage_location=d.get('storage_location', None), - table_constraints=_from_dict(d, 'table_constraints', TableConstraintList), + table_constraints=_repeated_dict(d, 'table_constraints', TableConstraint), table_id=d.get('table_id', None), table_type=_enum(d, 'table_type', TableType), updated_at=d.get('updated_at', None), @@ -3907,6 +4361,85 @@ def from_dict(cls, d: Dict[str, any]) -> UpdateModelVersionRequest: version=d.get('version', None)) +@dataclass +class UpdateMonitor: + assets_dir: str + """The directory to store monitoring assets (e.g. dashboard, metric tables).""" + + output_schema_name: str + """Schema where output metric tables are created.""" + + baseline_table_name: Optional[str] = None + """Name of the baseline table from which drift metrics are computed from. Columns in the monitored + table should also be present in the baseline table.""" + + custom_metrics: Optional[List[MonitorCustomMetric]] = None + """Custom metrics to compute on the monitored table. These can be aggregate metrics, derived + metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across + time windows).""" + + data_classification_config: Optional[MonitorDataClassificationConfig] = None + """The data classification config for the monitor.""" + + full_name: Optional[str] = None + """Full name of the table.""" + + inference_log: Optional[MonitorInferenceLogProfileType] = None + """Configuration for monitoring inference logs.""" + + notifications: Optional[List[MonitorNotificationsConfig]] = None + """The notification settings for the monitor.""" + + schedule: Optional[MonitorCronSchedule] = None + """The schedule for automatically updating and refreshing metric tables.""" + + slicing_exprs: Optional[List[str]] = None + """List of column expressions to slice data with for targeted analysis. The data is grouped by each + expression independently, resulting in a separate slice for each predicate and its complements. + For high-cardinality columns, only the top 100 unique values by frequency will generate slices.""" + + snapshot: Optional[Any] = None + """Configuration for monitoring snapshot tables.""" + + time_series: Optional[MonitorTimeSeriesProfileType] = None + """Configuration for monitoring time series tables.""" + + def as_dict(self) -> dict: + """Serializes the UpdateMonitor into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.assets_dir is not None: body['assets_dir'] = self.assets_dir + if self.baseline_table_name is not None: body['baseline_table_name'] = self.baseline_table_name + if self.custom_metrics: body['custom_metrics'] = [v.as_dict() for v in self.custom_metrics] + if self.data_classification_config: + body['data_classification_config'] = self.data_classification_config.as_dict() + if self.full_name is not None: body['full_name'] = self.full_name + if self.inference_log: body['inference_log'] = self.inference_log.as_dict() + if self.notifications: body['notifications'] = [v.as_dict() for v in self.notifications] + if self.output_schema_name is not None: body['output_schema_name'] = self.output_schema_name + if self.schedule: body['schedule'] = self.schedule.as_dict() + if self.slicing_exprs: body['slicing_exprs'] = [v for v in self.slicing_exprs] + if self.snapshot: body['snapshot'] = self.snapshot + if self.time_series: body['time_series'] = self.time_series.as_dict() + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> UpdateMonitor: + """Deserializes the UpdateMonitor from a dictionary.""" + return cls(assets_dir=d.get('assets_dir', None), + baseline_table_name=d.get('baseline_table_name', None), + custom_metrics=_repeated_dict(d, 'custom_metrics', MonitorCustomMetric), + data_classification_config=_from_dict(d, 'data_classification_config', + MonitorDataClassificationConfig), + full_name=d.get('full_name', None), + inference_log=_from_dict(d, 'inference_log', MonitorInferenceLogProfileType), + notifications=_repeated_dict(d, 'notifications', MonitorNotificationsConfig), + output_schema_name=d.get('output_schema_name', None), + schedule=_from_dict(d, 'schedule', MonitorCronSchedule), + slicing_exprs=d.get('slicing_exprs', None), + snapshot=d.get('snapshot', None), + time_series=_from_dict(d, 'time_series', MonitorTimeSeriesProfileType)) + + @dataclass class UpdatePermissions: changes: Optional[List[PermissionsChange]] = None @@ -5239,20 +5772,45 @@ def get(self, name: str) -> ExternalLocationInfo: res = self._api.do('GET', f'/api/2.1/unity-catalog/external-locations/{name}', headers=headers) return ExternalLocationInfo.from_dict(res) - def list(self) -> Iterator[ExternalLocationInfo]: + def list(self, + *, + max_results: Optional[int] = None, + page_token: Optional[str] = None) -> Iterator[ExternalLocationInfo]: """List external locations. Gets an array of external locations (__ExternalLocationInfo__ objects) from the metastore. The caller must be a metastore admin, the owner of the external location, or a user that has some privilege on - the external location. There is no guarantee of a specific ordering of the elements in the array. + the external location. For unpaginated request, there is no guarantee of a specific ordering of the + elements in the array. For paginated request, elements are ordered by their name. + + :param max_results: int (optional) + Maximum number of external locations to return. If not set, all the external locations are returned + (not recommended). - when set to a value greater than 0, the page length is the minimum of this + value and a server configured value; - when set to 0, the page length is set to a server configured + value (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. :returns: Iterator over :class:`ExternalLocationInfo` """ + query = {} + if max_results is not None: query['max_results'] = max_results + if page_token is not None: query['page_token'] = page_token headers = {'Accept': 'application/json', } - json = self._api.do('GET', '/api/2.1/unity-catalog/external-locations', headers=headers) - parsed = ListExternalLocationsResponse.from_dict(json).external_locations - return parsed if parsed is not None else [] + + while True: + json = self._api.do('GET', + '/api/2.1/unity-catalog/external-locations', + query=query, + headers=headers) + if 'external_locations' not in json or not json['external_locations']: + return + for v in json['external_locations']: + yield ExternalLocationInfo.from_dict(v) + if 'next_page_token' not in json or not json['next_page_token']: + return + query['page_token'] = json['next_page_token'] def update(self, name: str, @@ -5391,30 +5949,52 @@ def get(self, name: str) -> FunctionInfo: res = self._api.do('GET', f'/api/2.1/unity-catalog/functions/{name}', headers=headers) return FunctionInfo.from_dict(res) - def list(self, catalog_name: str, schema_name: str) -> Iterator[FunctionInfo]: + def list(self, + catalog_name: str, + schema_name: str, + *, + max_results: Optional[int] = None, + page_token: Optional[str] = None) -> Iterator[FunctionInfo]: """List functions. List functions within the specified parent catalog and schema. If the user is a metastore admin, all functions are returned in the output list. Otherwise, the user must have the **USE_CATALOG** privilege on the catalog and the **USE_SCHEMA** privilege on the schema, and the output list contains only - functions for which either the user has the **EXECUTE** privilege or the user is the owner. There is - no guarantee of a specific ordering of the elements in the array. + functions for which either the user has the **EXECUTE** privilege or the user is the owner. For + unpaginated request, there is no guarantee of a specific ordering of the elements in the array. For + paginated request, elements are ordered by their name. :param catalog_name: str Name of parent catalog for functions of interest. :param schema_name: str Parent schema of functions. + :param max_results: int (optional) + Maximum number of functions to return. If not set, all the functions are returned (not recommended). + - when set to a value greater than 0, the page length is the minimum of this value and a server + configured value; - when set to 0, the page length is set to a server configured value + (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. :returns: Iterator over :class:`FunctionInfo` """ query = {} if catalog_name is not None: query['catalog_name'] = catalog_name + if max_results is not None: query['max_results'] = max_results + if page_token is not None: query['page_token'] = page_token if schema_name is not None: query['schema_name'] = schema_name headers = {'Accept': 'application/json', } - json = self._api.do('GET', '/api/2.1/unity-catalog/functions', query=query, headers=headers) - parsed = ListFunctionsResponse.from_dict(json).functions - return parsed if parsed is not None else [] + + while True: + json = self._api.do('GET', '/api/2.1/unity-catalog/functions', query=query, headers=headers) + if 'functions' not in json or not json['functions']: + return + for v in json['functions']: + yield FunctionInfo.from_dict(v) + if 'next_page_token' not in json or not json['next_page_token']: + return + query['page_token'] = json['next_page_token'] def update(self, name: str, *, owner: Optional[str] = None) -> FunctionInfo: """Update a function. @@ -5540,6 +6120,231 @@ def update(self, return PermissionsList.from_dict(res) +class LakehouseMonitorsAPI: + """A monitor computes and monitors data or model quality metrics for a table over time. It generates metrics + tables and a dashboard that you can use to monitor table health and set alerts. + + Most write operations require the user to be the owner of the table (or its parent schema or parent + catalog). Viewing the dashboard, computed metrics, or monitor configuration only requires the user to have + **SELECT** privileges on the table (along with **USE_SCHEMA** and **USE_CATALOG**).""" + + def __init__(self, api_client): + self._api = api_client + + def create(self, + full_name: str, + assets_dir: str, + output_schema_name: str, + *, + baseline_table_name: Optional[str] = None, + custom_metrics: Optional[List[MonitorCustomMetric]] = None, + data_classification_config: Optional[MonitorDataClassificationConfig] = None, + inference_log: Optional[MonitorInferenceLogProfileType] = None, + notifications: Optional[List[MonitorNotificationsConfig]] = None, + schedule: Optional[MonitorCronSchedule] = None, + skip_builtin_dashboard: Optional[bool] = None, + slicing_exprs: Optional[List[str]] = None, + snapshot: Optional[Any] = None, + time_series: Optional[MonitorTimeSeriesProfileType] = None, + warehouse_id: Optional[str] = None) -> MonitorInfo: + """Create a table monitor. + + Creates a new monitor for the specified table. + + The caller must either: 1. be an owner of the table's parent catalog, have **USE_SCHEMA** on the + table's parent schema, and have **SELECT** access on the table 2. have **USE_CATALOG** on the table's + parent catalog, be an owner of the table's parent schema, and have **SELECT** access on the table. 3. + have the following permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on + the table's parent schema - be an owner of the table. + + Workspace assets, such as the dashboard, will be created in the workspace where this call was made. + + :param full_name: str + Full name of the table. + :param assets_dir: str + The directory to store monitoring assets (e.g. dashboard, metric tables). + :param output_schema_name: str + Schema where output metric tables are created. + :param baseline_table_name: str (optional) + Name of the baseline table from which drift metrics are computed from. Columns in the monitored + table should also be present in the baseline table. + :param custom_metrics: List[:class:`MonitorCustomMetric`] (optional) + Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics + (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). + :param data_classification_config: :class:`MonitorDataClassificationConfig` (optional) + The data classification config for the monitor. + :param inference_log: :class:`MonitorInferenceLogProfileType` (optional) + Configuration for monitoring inference logs. + :param notifications: List[:class:`MonitorNotificationsConfig`] (optional) + The notification settings for the monitor. + :param schedule: :class:`MonitorCronSchedule` (optional) + The schedule for automatically updating and refreshing metric tables. + :param skip_builtin_dashboard: bool (optional) + Whether to skip creating a default dashboard summarizing data quality metrics. + :param slicing_exprs: List[str] (optional) + List of column expressions to slice data with for targeted analysis. The data is grouped by each + expression independently, resulting in a separate slice for each predicate and its complements. For + high-cardinality columns, only the top 100 unique values by frequency will generate slices. + :param snapshot: Any (optional) + Configuration for monitoring snapshot tables. + :param time_series: :class:`MonitorTimeSeriesProfileType` (optional) + Configuration for monitoring time series tables. + :param warehouse_id: str (optional) + Optional argument to specify the warehouse for dashboard creation. If not specified, the first + running warehouse will be used. + + :returns: :class:`MonitorInfo` + """ + body = {} + if assets_dir is not None: body['assets_dir'] = assets_dir + if baseline_table_name is not None: body['baseline_table_name'] = baseline_table_name + if custom_metrics is not None: body['custom_metrics'] = [v.as_dict() for v in custom_metrics] + if data_classification_config is not None: + body['data_classification_config'] = data_classification_config.as_dict() + if inference_log is not None: body['inference_log'] = inference_log.as_dict() + if notifications is not None: body['notifications'] = [v.as_dict() for v in notifications] + if output_schema_name is not None: body['output_schema_name'] = output_schema_name + if schedule is not None: body['schedule'] = schedule.as_dict() + if skip_builtin_dashboard is not None: body['skip_builtin_dashboard'] = skip_builtin_dashboard + if slicing_exprs is not None: body['slicing_exprs'] = [v for v in slicing_exprs] + if snapshot is not None: body['snapshot'] = snapshot + if time_series is not None: body['time_series'] = time_series.as_dict() + if warehouse_id is not None: body['warehouse_id'] = warehouse_id + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + res = self._api.do('POST', + f'/api/2.1/unity-catalog/tables/{full_name}/monitor', + body=body, + headers=headers) + return MonitorInfo.from_dict(res) + + def delete(self, full_name: str): + """Delete a table monitor. + + Deletes a monitor for the specified table. + + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an + owner of the table. + + Additionally, the call must be made from the workspace where the monitor was created. + + Note that the metric tables and dashboard will not be deleted as part of this call; those assets must + be manually cleaned up (if desired). + + :param full_name: str + Full name of the table. + + + """ + + headers = {} + self._api.do('DELETE', f'/api/2.1/unity-catalog/tables/{full_name}/monitor', headers=headers) + + def get(self, full_name: str) -> MonitorInfo: + """Get a table monitor. + + Gets a monitor for the specified table. + + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema. 3. have the following + permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent + schema - **SELECT** privilege on the table. + + The returned information includes configuration values, as well as information on assets created by + the monitor. Some information (e.g., dashboard) may be filtered out if the caller is in a different + workspace than where the monitor was created. + + :param full_name: str + Full name of the table. + + :returns: :class:`MonitorInfo` + """ + + headers = {'Accept': 'application/json', } + res = self._api.do('GET', f'/api/2.1/unity-catalog/tables/{full_name}/monitor', headers=headers) + return MonitorInfo.from_dict(res) + + def update(self, + full_name: str, + assets_dir: str, + output_schema_name: str, + *, + baseline_table_name: Optional[str] = None, + custom_metrics: Optional[List[MonitorCustomMetric]] = None, + data_classification_config: Optional[MonitorDataClassificationConfig] = None, + inference_log: Optional[MonitorInferenceLogProfileType] = None, + notifications: Optional[List[MonitorNotificationsConfig]] = None, + schedule: Optional[MonitorCronSchedule] = None, + slicing_exprs: Optional[List[str]] = None, + snapshot: Optional[Any] = None, + time_series: Optional[MonitorTimeSeriesProfileType] = None) -> MonitorInfo: + """Update a table monitor. + + Updates a monitor for the specified table. + + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an + owner of the table. + + Additionally, the call must be made from the workspace where the monitor was created, and the caller + must be the original creator of the monitor. + + Certain configuration fields, such as output asset identifiers, cannot be updated. + + :param full_name: str + Full name of the table. + :param assets_dir: str + The directory to store monitoring assets (e.g. dashboard, metric tables). + :param output_schema_name: str + Schema where output metric tables are created. + :param baseline_table_name: str (optional) + Name of the baseline table from which drift metrics are computed from. Columns in the monitored + table should also be present in the baseline table. + :param custom_metrics: List[:class:`MonitorCustomMetric`] (optional) + Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics + (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). + :param data_classification_config: :class:`MonitorDataClassificationConfig` (optional) + The data classification config for the monitor. + :param inference_log: :class:`MonitorInferenceLogProfileType` (optional) + Configuration for monitoring inference logs. + :param notifications: List[:class:`MonitorNotificationsConfig`] (optional) + The notification settings for the monitor. + :param schedule: :class:`MonitorCronSchedule` (optional) + The schedule for automatically updating and refreshing metric tables. + :param slicing_exprs: List[str] (optional) + List of column expressions to slice data with for targeted analysis. The data is grouped by each + expression independently, resulting in a separate slice for each predicate and its complements. For + high-cardinality columns, only the top 100 unique values by frequency will generate slices. + :param snapshot: Any (optional) + Configuration for monitoring snapshot tables. + :param time_series: :class:`MonitorTimeSeriesProfileType` (optional) + Configuration for monitoring time series tables. + + :returns: :class:`MonitorInfo` + """ + body = {} + if assets_dir is not None: body['assets_dir'] = assets_dir + if baseline_table_name is not None: body['baseline_table_name'] = baseline_table_name + if custom_metrics is not None: body['custom_metrics'] = [v.as_dict() for v in custom_metrics] + if data_classification_config is not None: + body['data_classification_config'] = data_classification_config.as_dict() + if inference_log is not None: body['inference_log'] = inference_log.as_dict() + if notifications is not None: body['notifications'] = [v.as_dict() for v in notifications] + if output_schema_name is not None: body['output_schema_name'] = output_schema_name + if schedule is not None: body['schedule'] = schedule.as_dict() + if slicing_exprs is not None: body['slicing_exprs'] = [v for v in slicing_exprs] + if snapshot is not None: body['snapshot'] = snapshot + if time_series is not None: body['time_series'] = time_series.as_dict() + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + res = self._api.do('PUT', + f'/api/2.1/unity-catalog/tables/{full_name}/monitor', + body=body, + headers=headers) + return MonitorInfo.from_dict(res) + + class MetastoresAPI: """A metastore is the top-level container of objects in Unity Catalog. It stores data assets (tables and views) and the permissions that govern access to them. Databricks account admins can create metastores and @@ -5892,9 +6697,13 @@ def list(self, :param full_name: str The full three-level name of the registered model under which to list model versions :param max_results: int (optional) - Max number of model versions to return + Maximum number of model versions to return. If not set, the page length is set to a server + configured value (100, as of 1/3/2024). - when set to a value greater than 0, the page length is the + minimum of this value and a server configured value(1000, as of 1/3/2024); - when set to 0, the page + length is set to a server configured value (100, as of 1/3/2024) (recommended); - when set to a + value less than 0, an invalid parameter error is returned; :param page_token: str (optional) - Opaque token to send for the next page of results (pagination). + Opaque pagination token to go to next page based on previous query. :returns: Iterator over :class:`ModelVersionInfo` """ @@ -6269,26 +7078,47 @@ def get(self, full_name: str) -> SchemaInfo: res = self._api.do('GET', f'/api/2.1/unity-catalog/schemas/{full_name}', headers=headers) return SchemaInfo.from_dict(res) - def list(self, catalog_name: str) -> Iterator[SchemaInfo]: + def list(self, + catalog_name: str, + *, + max_results: Optional[int] = None, + page_token: Optional[str] = None) -> Iterator[SchemaInfo]: """List schemas. Gets an array of schemas for a catalog in the metastore. If the caller is the metastore admin or the owner of the parent catalog, all schemas for the catalog will be retrieved. Otherwise, only schemas - owned by the caller (or for which the caller has the **USE_SCHEMA** privilege) will be retrieved. - There is no guarantee of a specific ordering of the elements in the array. + owned by the caller (or for which the caller has the **USE_SCHEMA** privilege) will be retrieved. For + unpaginated request, there is no guarantee of a specific ordering of the elements in the array. For + paginated request, elements are ordered by their name. :param catalog_name: str Parent catalog for schemas of interest. + :param max_results: int (optional) + Maximum number of schemas to return. If not set, all the schemas are returned (not recommended). - + when set to a value greater than 0, the page length is the minimum of this value and a server + configured value; - when set to 0, the page length is set to a server configured value + (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. :returns: Iterator over :class:`SchemaInfo` """ query = {} if catalog_name is not None: query['catalog_name'] = catalog_name + if max_results is not None: query['max_results'] = max_results + if page_token is not None: query['page_token'] = page_token headers = {'Accept': 'application/json', } - json = self._api.do('GET', '/api/2.1/unity-catalog/schemas', query=query, headers=headers) - parsed = ListSchemasResponse.from_dict(json).schemas - return parsed if parsed is not None else [] + + while True: + json = self._api.do('GET', '/api/2.1/unity-catalog/schemas', query=query, headers=headers) + if 'schemas' not in json or not json['schemas']: + return + for v in json['schemas']: + yield SchemaInfo.from_dict(v) + if 'next_page_token' not in json or not json['next_page_token']: + return + query['page_token'] = json['next_page_token'] def update(self, full_name: str, @@ -6442,21 +7272,47 @@ def get(self, name: str) -> StorageCredentialInfo: res = self._api.do('GET', f'/api/2.1/unity-catalog/storage-credentials/{name}', headers=headers) return StorageCredentialInfo.from_dict(res) - def list(self) -> Iterator[StorageCredentialInfo]: + def list(self, + *, + max_results: Optional[int] = None, + page_token: Optional[str] = None) -> Iterator[StorageCredentialInfo]: """List credentials. Gets an array of storage credentials (as __StorageCredentialInfo__ objects). The array is limited to only those storage credentials the caller has permission to access. If the caller is a metastore - admin, all storage credentials will be retrieved. There is no guarantee of a specific ordering of the - elements in the array. + admin, retrieval of credentials is unrestricted. For unpaginated request, there is no guarantee of a + specific ordering of the elements in the array. For paginated request, elements are ordered by their + name. + + :param max_results: int (optional) + Maximum number of storage credentials to return. If not set, all the storage credentials are + returned (not recommended). - when set to a value greater than 0, the page length is the minimum of + this value and a server configured value; - when set to 0, the page length is set to a server + configured value (recommended); - when set to a value less than 0, an invalid parameter error is + returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. :returns: Iterator over :class:`StorageCredentialInfo` """ + query = {} + if max_results is not None: query['max_results'] = max_results + if page_token is not None: query['page_token'] = page_token headers = {'Accept': 'application/json', } - json = self._api.do('GET', '/api/2.1/unity-catalog/storage-credentials', headers=headers) - parsed = ListStorageCredentialsResponse.from_dict(json).storage_credentials - return parsed if parsed is not None else [] + + while True: + json = self._api.do('GET', + '/api/2.1/unity-catalog/storage-credentials', + query=query, + headers=headers) + if 'storage_credentials' not in json or not json['storage_credentials']: + return + for v in json['storage_credentials']: + yield StorageCredentialInfo.from_dict(v) + if 'next_page_token' not in json or not json['next_page_token']: + return + query['page_token'] = json['next_page_token'] def update(self, name: str, @@ -6760,13 +7616,34 @@ def delete(self, full_name: str): headers = {'Accept': 'application/json', } self._api.do('DELETE', f'/api/2.1/unity-catalog/tables/{full_name}', headers=headers) + def exists(self, full_name: str) -> TableExistsResponse: + """Get boolean reflecting if table exists. + + Gets if a table exists in the metastore for a specific catalog and schema. The caller must satisfy one + of the following requirements: * Be a metastore admin * Be the owner of the parent catalog * Be the + owner of the parent schema and have the USE_CATALOG privilege on the parent catalog * Have the + **USE_CATALOG** privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema, + and either be the table owner or have the SELECT privilege on the table. * Have BROWSE privilege on + the parent catalog * Have BROWSE privilege on the parent schema. + + :param full_name: str + Full name of the table. + + :returns: :class:`TableExistsResponse` + """ + + headers = {'Accept': 'application/json', } + res = self._api.do('GET', f'/api/2.1/unity-catalog/tables/{full_name}/exists', headers=headers) + return TableExistsResponse.from_dict(res) + def get(self, full_name: str, *, include_delta_metadata: Optional[bool] = None) -> TableInfo: """Get a table. - Gets a table from the metastore for a specific catalog and schema. The caller must be a metastore - admin, be the owner of the table and have the **USE_CATALOG** privilege on the parent catalog and the - **USE_SCHEMA** privilege on the parent schema, or be the owner of the table and have the **SELECT** - privilege on it as well. + Gets a table from the metastore for a specific catalog and schema. The caller must satisfy one of the + following requirements: * Be a metastore admin * Be the owner of the parent catalog * Be the owner of + the parent schema and have the USE_CATALOG privilege on the parent catalog * Have the **USE_CATALOG** + privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema, and either be + the table owner or have the SELECT privilege on the table. :param full_name: str Full name of the table. @@ -6788,6 +7665,8 @@ def list(self, *, include_delta_metadata: Optional[bool] = None, max_results: Optional[int] = None, + omit_columns: Optional[bool] = None, + omit_properties: Optional[bool] = None, page_token: Optional[str] = None) -> Iterator[TableInfo]: """List tables. @@ -6804,11 +7683,14 @@ def list(self, :param include_delta_metadata: bool (optional) Whether delta metadata should be included in the response. :param max_results: int (optional) - Maximum number of tables to return (page length). If not set, all accessible tables in the schema - are returned. If set to: - - * greater than 0, page length is the minimum of this value and a server configured value. * equal to - 0, page length is set to a server configured value. * lesser than 0, invalid parameter error. + Maximum number of tables to return. If not set, all the tables are returned (not recommended). - + when set to a value greater than 0, the page length is the minimum of this value and a server + configured value; - when set to 0, the page length is set to a server configured value + (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param omit_columns: bool (optional) + Whether to omit the columns of the table from the response or not. + :param omit_properties: bool (optional) + Whether to omit the properties of the table from the response or not. :param page_token: str (optional) Opaque token to send for the next page of results (pagination). @@ -6819,6 +7701,8 @@ def list(self, if catalog_name is not None: query['catalog_name'] = catalog_name if include_delta_metadata is not None: query['include_delta_metadata'] = include_delta_metadata if max_results is not None: query['max_results'] = max_results + if omit_columns is not None: query['omit_columns'] = omit_columns + if omit_properties is not None: query['omit_properties'] = omit_properties if page_token is not None: query['page_token'] = page_token if schema_name is not None: query['schema_name'] = schema_name headers = {'Accept': 'application/json', } @@ -6845,10 +7729,10 @@ def list_summaries(self, Gets an array of summaries for tables for a schema and catalog within the metastore. The table summaries returned are either: - * summaries for all tables (within the current metastore and parent catalog and schema), when the user - is a metastore admin, or: * summaries for all tables and schemas (within the current metastore and - parent catalog) for which the user has ownership or the **SELECT** privilege on the table and - ownership or **USE_SCHEMA** privilege on the schema, provided that the user also has ownership or the + * summaries for tables (within the current metastore and parent catalog and schema), when the user is + a metastore admin, or: * summaries for tables and schemas (within the current metastore and parent + catalog) for which the user has ownership or the **SELECT** privilege on the table and ownership or + **USE_SCHEMA** privilege on the schema, provided that the user also has ownership or the **USE_CATALOG** privilege on the parent catalog. There is no guarantee of a specific ordering of the elements in the array. @@ -6856,9 +7740,13 @@ def list_summaries(self, :param catalog_name: str Name of parent catalog for tables of interest. :param max_results: int (optional) - Maximum number of tables to return (page length). Defaults to 10000. + Maximum number of summaries for tables to return. If not set, the page length is set to a server + configured value (10000, as of 1/5/2024). - when set to a value greater than 0, the page length is + the minimum of this value and a server configured value (10000, as of 1/5/2024); - when set to 0, + the page length is set to a server configured value (10000, as of 1/5/2024) (recommended); - when + set to a value less than 0, an invalid parameter error is returned; :param page_token: str (optional) - Opaque token to send for the next page of results (pagination). + Opaque pagination token to go to next page based on previous query. :param schema_name_pattern: str (optional) A sql LIKE pattern (% and _) for schema names. All schemas will be returned if not set or empty. :param table_name_pattern: str (optional) diff --git a/databricks/sdk/service/compute.py b/databricks/sdk/service/compute.py index 60e92256c..816f0db3a 100755 --- a/databricks/sdk/service/compute.py +++ b/databricks/sdk/service/compute.py @@ -357,8 +357,7 @@ class ClusterAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -1132,8 +1131,7 @@ class ClusterPolicyAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -2053,7 +2051,8 @@ class CreatePolicy: """Additional human-readable description of the cluster policy.""" libraries: Optional[List[Library]] = None - """A list of libraries to be installed on the next cluster restart that uses this policy.""" + """A list of libraries to be installed on the next cluster restart that uses this policy. The + maximum number of libraries is 500.""" max_clusters_per_user: Optional[int] = None """Max number of clusters per user that can be active using this policy. If not present, there is @@ -2732,7 +2731,8 @@ class EditPolicy: """Additional human-readable description of the cluster policy.""" libraries: Optional[List[Library]] = None - """A list of libraries to be installed on the next cluster restart that uses this policy.""" + """A list of libraries to be installed on the next cluster restart that uses this policy. The + maximum number of libraries is 500.""" max_clusters_per_user: Optional[int] = None """Max number of clusters per user that can be active using this policy. If not present, there is @@ -3632,8 +3632,7 @@ class InstancePoolAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -4740,7 +4739,8 @@ class Policy: be deleted, and their policy families cannot be changed.""" libraries: Optional[List[Library]] = None - """A list of libraries to be installed on the next cluster restart that uses this policy.""" + """A list of libraries to be installed on the next cluster restart that uses this policy. The + maximum number of libraries is 500.""" max_clusters_per_user: Optional[int] = None """Max number of clusters per user that can be active using this policy. If not present, there is @@ -5426,9 +5426,9 @@ class ClusterPoliciesAPI: policies have ACLs that limit their use to specific users and groups. With cluster policies, you can: - Auto-install cluster libraries on the next restart by listing them in - the policy's "libraries" field. - Limit users to creating clusters with the prescribed settings. - - Simplify the user interface, enabling more users to create clusters, by fixing and hiding some fields. - - Manage costs by setting limits on attributes that impact the hourly rate. + the policy's "libraries" field (Public Preview). - Limit users to creating clusters with the prescribed + settings. - Simplify the user interface, enabling more users to create clusters, by fixing and hiding some + fields. - Manage costs by setting limits on attributes that impact the hourly rate. Cluster policy permissions limit which policies a user can select in the Policy drop-down when the user creates a cluster: - A user who has unrestricted cluster create permission can select the Unrestricted @@ -5465,7 +5465,8 @@ def create(self, :param description: str (optional) Additional human-readable description of the cluster policy. :param libraries: List[:class:`Library`] (optional) - A list of libraries to be installed on the next cluster restart that uses this policy. + A list of libraries to be installed on the next cluster restart that uses this policy. The maximum + number of libraries is 500. :param max_clusters_per_user: int (optional) Max number of clusters per user that can be active using this policy. If not present, there is no max limit. @@ -5541,7 +5542,8 @@ def edit(self, :param description: str (optional) Additional human-readable description of the cluster policy. :param libraries: List[:class:`Library`] (optional) - A list of libraries to be installed on the next cluster restart that uses this policy. + A list of libraries to be installed on the next cluster restart that uses this policy. The maximum + number of libraries is 500. :param max_clusters_per_user: int (optional) Max number of clusters per user that can be active using this policy. If not present, there is no max limit. @@ -5798,7 +5800,9 @@ def wait_get_cluster_terminated( def change_owner(self, cluster_id: str, owner_username: str): """Change cluster owner. - Change the owner of the cluster. You must be an admin to perform this operation. + Change the owner of the cluster. You must be an admin and the cluster must be terminated to perform + this operation. The service principal application ID can be supplied as an argument to + `owner_username`. :param cluster_id: str @@ -7092,7 +7096,7 @@ def list(self) -> Iterator[GlobalInitScriptDetails]: Get a list of all global init scripts for this workspace. This returns all properties for each script but **not** the script contents. To retrieve the contents of a script, use the [get a global init - script](#operation/get-script) operation. + script](:method:globalinitscripts/get) operation. :returns: Iterator over :class:`GlobalInitScriptDetails` """ diff --git a/databricks/sdk/service/dashboards.py b/databricks/sdk/service/dashboards.py new file mode 100755 index 000000000..51ae22e1e --- /dev/null +++ b/databricks/sdk/service/dashboards.py @@ -0,0 +1,75 @@ +# Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from typing import Dict, Optional + +_LOG = logging.getLogger('databricks.sdk') + +# all definitions in this file are in alphabetical order + + +@dataclass +class PublishRequest: + dashboard_id: Optional[str] = None + """UUID identifying the dashboard to be published.""" + + embed_credentials: Optional[bool] = None + """Flag to indicate if the publisher's credentials should be embedded in the published dashboard. + These embedded credentials will be used to execute the published dashboard's queries.""" + + warehouse_id: Optional[str] = None + """The ID of the warehouse that can be used to override the warehouse which was set in the draft.""" + + def as_dict(self) -> dict: + """Serializes the PublishRequest into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.dashboard_id is not None: body['dashboard_id'] = self.dashboard_id + if self.embed_credentials is not None: body['embed_credentials'] = self.embed_credentials + if self.warehouse_id is not None: body['warehouse_id'] = self.warehouse_id + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> PublishRequest: + """Deserializes the PublishRequest from a dictionary.""" + return cls(dashboard_id=d.get('dashboard_id', None), + embed_credentials=d.get('embed_credentials', None), + warehouse_id=d.get('warehouse_id', None)) + + +class LakeviewAPI: + """These APIs provide specific management operations for Lakeview dashboards. Generic resource management can + be done with Workspace API (import, export, get-status, list, delete).""" + + def __init__(self, api_client): + self._api = api_client + + def publish(self, + dashboard_id: str, + *, + embed_credentials: Optional[bool] = None, + warehouse_id: Optional[str] = None): + """Publish dashboard. + + Publish the current draft dashboard. + + :param dashboard_id: str + UUID identifying the dashboard to be published. + :param embed_credentials: bool (optional) + Flag to indicate if the publisher's credentials should be embedded in the published dashboard. These + embedded credentials will be used to execute the published dashboard's queries. + :param warehouse_id: str (optional) + The ID of the warehouse that can be used to override the warehouse which was set in the draft. + + + """ + body = {} + if embed_credentials is not None: body['embed_credentials'] = embed_credentials + if warehouse_id is not None: body['warehouse_id'] = warehouse_id + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + self._api.do('POST', + f'/api/2.0/lakeview/dashboards/{dashboard_id}/published', + body=body, + headers=headers) diff --git a/databricks/sdk/service/files.py b/databricks/sdk/service/files.py index 29fb916f2..c1c3c184b 100755 --- a/databricks/sdk/service/files.py +++ b/databricks/sdk/service/files.py @@ -234,7 +234,7 @@ def from_dict(cls, d: Dict[str, any]) -> Put: @dataclass class ReadResponse: bytes_read: Optional[int] = None - """The number of bytes read (could be less than `length` if we hit end of file). This refers to + """The number of bytes read (could be less than ``length`` if we hit end of file). This refers to number of bytes read in unencoded version (response data is base64-encoded).""" data: Optional[str] = None @@ -264,9 +264,9 @@ def add_block(self, handle: int, data: str): """Append data block. Appends a block of data to the stream specified by the input handle. If the handle does not exist, - this call will throw an exception with `RESOURCE_DOES_NOT_EXIST`. + this call will throw an exception with ``RESOURCE_DOES_NOT_EXIST``. - If the block of data exceeds 1 MB, this call will throw an exception with `MAX_BLOCK_SIZE_EXCEEDED`. + If the block of data exceeds 1 MB, this call will throw an exception with ``MAX_BLOCK_SIZE_EXCEEDED``. :param handle: int The handle on an open stream. @@ -285,7 +285,7 @@ def close(self, handle: int): """Close the stream. Closes the stream specified by the input handle. If the handle does not exist, this call throws an - exception with `RESOURCE_DOES_NOT_EXIST`. + exception with ``RESOURCE_DOES_NOT_EXIST``. :param handle: int The handle on an open stream. @@ -302,12 +302,12 @@ def create(self, path: str, *, overwrite: Optional[bool] = None) -> CreateRespon Opens a stream to write to a file and returns a handle to this stream. There is a 10 minute idle timeout on this handle. If a file or directory already exists on the given path and __overwrite__ is - set to `false`, this call throws an exception with `RESOURCE_ALREADY_EXISTS`. + set to false, this call will throw an exception with ``RESOURCE_ALREADY_EXISTS``. A typical workflow for file upload would be: - 1. Issue a `create` call and get a handle. 2. Issue one or more `add-block` calls with the handle you - have. 3. Issue a `close` call with the handle you have. + 1. Issue a ``create`` call and get a handle. 2. Issue one or more ``add-block`` calls with the handle + you have. 3. Issue a ``close`` call with the handle you have. :param path: str The path of the new file. The path should be the absolute DBFS path. @@ -423,7 +423,7 @@ def move(self, source_path: str, destination_path: str): Moves a file from one location to another location within DBFS. If the source file does not exist, this call throws an exception with `RESOURCE_DOES_NOT_EXIST`. If a file already exists in the destination path, this call throws an exception with `RESOURCE_ALREADY_EXISTS`. If the given source - path is a directory, this call always recursively moves all files.", + path is a directory, this call always recursively moves all files. :param source_path: str The source path of the file or directory. The path should be the absolute DBFS path. @@ -477,7 +477,7 @@ def read(self, path: str, *, length: Optional[int] = None, offset: Optional[int] 1 MB, this call throws an exception with `MAX_READ_SIZE_EXCEEDED`. If `offset + length` exceeds the number of bytes in a file, it reads the contents until the end of - file.", + file. :param path: str The path of the file to read. The path should be the absolute DBFS path. diff --git a/databricks/sdk/service/iam.py b/databricks/sdk/service/iam.py index b1f4dcbf5..5a4131f19 100755 --- a/databricks/sdk/service/iam.py +++ b/databricks/sdk/service/iam.py @@ -23,8 +23,7 @@ class AccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -456,8 +455,7 @@ class PasswordAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -1125,6 +1123,7 @@ def from_dict(cls, d: Dict[str, any]) -> User: class UserSchema(Enum): URN_IETF_PARAMS_SCIM_SCHEMAS_CORE_2_0_USER = 'urn:ietf:params:scim:schemas:core:2.0:User' + URN_IETF_PARAMS_SCIM_SCHEMAS_EXTENSION_WORKSPACE_2_0_USER = 'urn:ietf:params:scim:schemas:extension:workspace:2.0:User' class WorkspacePermission(Enum): diff --git a/databricks/sdk/service/jobs.py b/databricks/sdk/service/jobs.py index 25cc245f2..e7fef2de3 100755 --- a/databricks/sdk/service/jobs.py +++ b/databricks/sdk/service/jobs.py @@ -973,8 +973,7 @@ class JobAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -3844,8 +3843,9 @@ class SubmitTask: existing_cluster_id: Optional[str] = None """If existing_cluster_id, the ID of an existing cluster that is used for all runs of this task. - When running tasks on an existing cluster, you may need to manually restart the cluster if it - stops responding. We suggest running jobs on new clusters for greater reliability.""" + Only all-purpose clusters are supported. When running tasks on an existing cluster, you may need + to manually restart the cluster if it stops responding. We suggest running jobs on new clusters + for greater reliability.""" health: Optional[JobsHealthRules] = None """An optional set of health rules that can be defined for this job.""" @@ -3998,8 +3998,9 @@ class Task: existing_cluster_id: Optional[str] = None """If existing_cluster_id, the ID of an existing cluster that is used for all runs of this task. - When running tasks on an existing cluster, you may need to manually restart the cluster if it - stops responding. We suggest running jobs on new clusters for greater reliability.""" + Only all-purpose clusters are supported. When running tasks on an existing cluster, you may need + to manually restart the cluster if it stops responding. We suggest running jobs on new clusters + for greater reliability.""" health: Optional[JobsHealthRules] = None """An optional set of health rules that can be defined for this job.""" @@ -5185,9 +5186,10 @@ def repair_run_and_wait( sql_params=sql_params).result(timeout=timeout) def reset(self, job_id: int, new_settings: JobSettings): - """Overwrite all settings for a job. + """Update all job settings (reset). - Overwrite all settings for the given job. Use the Update endpoint to update job settings partially. + Overwrite all settings for the given job. Use the [_Update_ endpoint](:method:jobs/update) to update + job settings partially. :param job_id: int The canonical identifier of the job to reset. This field is required. @@ -5497,10 +5499,10 @@ def update(self, *, fields_to_remove: Optional[List[str]] = None, new_settings: Optional[JobSettings] = None): - """Partially update a job. + """Update job settings partially. - Add, update, or remove specific settings of an existing job. Use the ResetJob to overwrite all job - settings. + Add, update, or remove specific settings of an existing job. Use the [_Reset_ + endpoint](:method:jobs/reset) to overwrite all job settings. :param job_id: int The canonical identifier of the job to update. This field is required. diff --git a/databricks/sdk/service/ml.py b/databricks/sdk/service/ml.py index 58410ac9e..ff2da8655 100755 --- a/databricks/sdk/service/ml.py +++ b/databricks/sdk/service/ml.py @@ -882,8 +882,7 @@ class ExperimentAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -2085,8 +2084,7 @@ class RegisteredModelAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -3594,7 +3592,8 @@ def delete_runs(self, """Delete runs by creation time. Bulk delete runs in an experiment that were created prior to or at the specified timestamp. Deletes at - most max_runs per request. + most max_runs per request. To call this API from a Databricks Notebook in Python, you can use the + client code snippet on https://learn.microsoft.com/en-us/azure/databricks/mlflow/runs#bulk-delete. :param experiment_id: str The ID of the experiment containing the runs to delete. @@ -4068,7 +4067,8 @@ def restore_runs(self, """Restore runs by deletion time. Bulk restore runs in an experiment that were deleted no earlier than the specified timestamp. Restores - at most max_runs per request. + at most max_runs per request. To call this API from a Databricks Notebook in Python, you can use the + client code snippet on https://learn.microsoft.com/en-us/azure/databricks/mlflow/runs#bulk-restore. :param experiment_id: str The ID of the experiment containing the runs to restore. diff --git a/databricks/sdk/service/pipelines.py b/databricks/sdk/service/pipelines.py index 560e76991..6ef8efa73 100755 --- a/databricks/sdk/service/pipelines.py +++ b/databricks/sdk/service/pipelines.py @@ -724,8 +724,7 @@ class PipelineAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -832,6 +831,11 @@ class PipelineCluster: """Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used.""" + init_scripts: Optional[List[compute.InitScriptInfo]] = None + """The configuration for storing init scripts. Any number of destinations can be specified. The + scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, + init script logs are sent to `//init_scripts`.""" + instance_pool_id: Optional[str] = None """The optional ID of the instance pool to which the cluster belongs.""" @@ -895,6 +899,7 @@ def as_dict(self) -> dict: body['driver_instance_pool_id'] = self.driver_instance_pool_id if self.driver_node_type_id is not None: body['driver_node_type_id'] = self.driver_node_type_id if self.gcp_attributes: body['gcp_attributes'] = self.gcp_attributes.as_dict() + if self.init_scripts: body['init_scripts'] = [v.as_dict() for v in self.init_scripts] if self.instance_pool_id is not None: body['instance_pool_id'] = self.instance_pool_id if self.label is not None: body['label'] = self.label if self.node_type_id is not None: body['node_type_id'] = self.node_type_id @@ -917,6 +922,7 @@ def from_dict(cls, d: Dict[str, any]) -> PipelineCluster: driver_instance_pool_id=d.get('driver_instance_pool_id', None), driver_node_type_id=d.get('driver_node_type_id', None), gcp_attributes=_from_dict(d, 'gcp_attributes', compute.GcpAttributes), + init_scripts=_repeated_dict(d, 'init_scripts', compute.InitScriptInfo), instance_pool_id=d.get('instance_pool_id', None), label=d.get('label', None), node_type_id=d.get('node_type_id', None), @@ -1402,6 +1408,10 @@ class StartUpdate: full_refresh_selection are empty, this is a full graph update. Full Refresh on a table means that the states of the table will be reset before the refresh.""" + validate_only: Optional[bool] = None + """If true, this update only validates the correctness of pipeline source code but does not + materialize or publish any datasets.""" + def as_dict(self) -> dict: """Serializes the StartUpdate into a dictionary suitable for use as a JSON request body.""" body = {} @@ -1411,6 +1421,7 @@ def as_dict(self) -> dict: body['full_refresh_selection'] = [v for v in self.full_refresh_selection] if self.pipeline_id is not None: body['pipeline_id'] = self.pipeline_id if self.refresh_selection: body['refresh_selection'] = [v for v in self.refresh_selection] + if self.validate_only is not None: body['validate_only'] = self.validate_only return body @classmethod @@ -1420,7 +1431,8 @@ def from_dict(cls, d: Dict[str, any]) -> StartUpdate: full_refresh=d.get('full_refresh', None), full_refresh_selection=d.get('full_refresh_selection', None), pipeline_id=d.get('pipeline_id', None), - refresh_selection=d.get('refresh_selection', None)) + refresh_selection=d.get('refresh_selection', None), + validate_only=d.get('validate_only', None)) class StartUpdateCause(Enum): @@ -1486,6 +1498,10 @@ class UpdateInfo: update_id: Optional[str] = None """The ID of this update.""" + validate_only: Optional[bool] = None + """If true, this update only validates the correctness of pipeline source code but does not + materialize or publish any datasets.""" + def as_dict(self) -> dict: """Serializes the UpdateInfo into a dictionary suitable for use as a JSON request body.""" body = {} @@ -1500,6 +1516,7 @@ def as_dict(self) -> dict: if self.refresh_selection: body['refresh_selection'] = [v for v in self.refresh_selection] if self.state is not None: body['state'] = self.state.value if self.update_id is not None: body['update_id'] = self.update_id + if self.validate_only is not None: body['validate_only'] = self.validate_only return body @classmethod @@ -1514,7 +1531,8 @@ def from_dict(cls, d: Dict[str, any]) -> UpdateInfo: pipeline_id=d.get('pipeline_id', None), refresh_selection=d.get('refresh_selection', None), state=_enum(d, 'state', UpdateInfoState), - update_id=d.get('update_id', None)) + update_id=d.get('update_id', None), + validate_only=d.get('validate_only', None)) class UpdateInfoCause(Enum): @@ -2007,7 +2025,8 @@ def start_update(self, cause: Optional[StartUpdateCause] = None, full_refresh: Optional[bool] = None, full_refresh_selection: Optional[List[str]] = None, - refresh_selection: Optional[List[str]] = None) -> StartUpdateResponse: + refresh_selection: Optional[List[str]] = None, + validate_only: Optional[bool] = None) -> StartUpdateResponse: """Start a pipeline. Starts a new update for the pipeline. If there is already an active update for the pipeline, the @@ -2025,6 +2044,9 @@ def start_update(self, A list of tables to update without fullRefresh. If both refresh_selection and full_refresh_selection are empty, this is a full graph update. Full Refresh on a table means that the states of the table will be reset before the refresh. + :param validate_only: bool (optional) + If true, this update only validates the correctness of pipeline source code but does not materialize + or publish any datasets. :returns: :class:`StartUpdateResponse` """ @@ -2034,6 +2056,7 @@ def start_update(self, if full_refresh_selection is not None: body['full_refresh_selection'] = [v for v in full_refresh_selection] if refresh_selection is not None: body['refresh_selection'] = [v for v in refresh_selection] + if validate_only is not None: body['validate_only'] = validate_only headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } res = self._api.do('POST', f'/api/2.0/pipelines/{pipeline_id}/updates', body=body, headers=headers) return StartUpdateResponse.from_dict(res) diff --git a/databricks/sdk/service/serving.py b/databricks/sdk/service/serving.py index b0d8e7f5d..b5cbdb702 100755 --- a/databricks/sdk/service/serving.py +++ b/databricks/sdk/service/serving.py @@ -1939,8 +1939,7 @@ class ServingEndpointAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" diff --git a/databricks/sdk/service/settings.py b/databricks/sdk/service/settings.py index 15bca6405..341108cf6 100755 --- a/databricks/sdk/service/settings.py +++ b/databricks/sdk/service/settings.py @@ -90,15 +90,17 @@ def from_dict(cls, d: Dict[str, any]) -> CreateNetworkConnectivityConfigRequest: @dataclass class CreateOboTokenRequest: + """Configuration details for creating on-behalf tokens.""" + application_id: str """Application ID of the service principal.""" - lifetime_seconds: int - """The number of seconds before the token expires.""" - comment: Optional[str] = None """Comment that describes the purpose of the token.""" + lifetime_seconds: Optional[int] = None + """The number of seconds before the token expires.""" + def as_dict(self) -> dict: """Serializes the CreateOboTokenRequest into a dictionary suitable for use as a JSON request body.""" body = {} @@ -117,6 +119,8 @@ def from_dict(cls, d: Dict[str, any]) -> CreateOboTokenRequest: @dataclass class CreateOboTokenResponse: + """An on-behalf token was successfully created for the service principal.""" + token_info: Optional[TokenInfo] = None token_value: Optional[str] = None @@ -182,7 +186,7 @@ class CreateTokenRequest: lifetime_seconds: Optional[int] = None """The lifetime of the token, in seconds. - If the ifetime is not specified, this token remains valid indefinitely.""" + If the lifetime is not specified, this token remains valid indefinitely.""" def as_dict(self) -> dict: """Serializes the CreateTokenRequest into a dictionary suitable for use as a JSON request body.""" @@ -454,6 +458,24 @@ def from_dict(cls, d: Dict[str, any]) -> GetTokenPermissionLevelsResponse: return cls(permission_levels=_repeated_dict(d, 'permission_levels', TokenPermissionsDescription)) +@dataclass +class GetTokenResponse: + """Token with specified Token ID was successfully returned.""" + + token_info: Optional[TokenInfo] = None + + def as_dict(self) -> dict: + """Serializes the GetTokenResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.token_info: body['token_info'] = self.token_info.as_dict() + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> GetTokenResponse: + """Deserializes the GetTokenResponse from a dictionary.""" + return cls(token_info=_from_dict(d, 'token_info', TokenInfo)) + + @dataclass class IpAccessListInfo: """Definition of an IP Access list""" @@ -582,9 +604,29 @@ def from_dict(cls, d: Dict[str, any]) -> ListNetworkConnectivityConfigurationsRe next_page_token=d.get('next_page_token', None)) +@dataclass +class ListPublicTokensResponse: + token_infos: Optional[List[PublicTokenInfo]] = None + """The information for each token.""" + + def as_dict(self) -> dict: + """Serializes the ListPublicTokensResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.token_infos: body['token_infos'] = [v.as_dict() for v in self.token_infos] + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> ListPublicTokensResponse: + """Deserializes the ListPublicTokensResponse from a dictionary.""" + return cls(token_infos=_repeated_dict(d, 'token_infos', PublicTokenInfo)) + + @dataclass class ListTokensResponse: + """Tokens were successfully returned.""" + token_infos: Optional[List[TokenInfo]] = None + """Token metadata of each user-created token in the workspace""" def as_dict(self) -> dict: """Serializes the ListTokensResponse into a dictionary suitable for use as a JSON request body.""" @@ -998,7 +1040,7 @@ class ReplaceIpAccessList: """Specifies whether this IP access list is enabled.""" ip_access_list_id: Optional[str] = None - """The ID for the corresponding IP access list to modify""" + """The ID for the corresponding IP access list""" ip_addresses: Optional[List[str]] = None @@ -1065,8 +1107,7 @@ class TokenAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -1283,7 +1324,7 @@ class UpdateIpAccessList: """Specifies whether this IP access list is enabled.""" ip_access_list_id: Optional[str] = None - """The ID for the corresponding IP access list to modify""" + """The ID for the corresponding IP access list""" ip_addresses: Optional[List[str]] = None @@ -1391,6 +1432,7 @@ def delete(self, ip_access_list_id: str): Deletes an IP access list, specified by its list ID. :param ip_access_list_id: str + The ID for the corresponding IP access list """ @@ -1406,6 +1448,7 @@ def get(self, ip_access_list_id: str) -> GetIpAccessListResponse: Gets an IP access list, specified by its list ID. :param ip_access_list_id: str + The ID for the corresponding IP access list :returns: :class:`GetIpAccessListResponse` """ @@ -1451,6 +1494,7 @@ def replace(self, effect. :param ip_access_list_id: str + The ID for the corresponding IP access list :param label: str Label for the IP access list. This **cannot** be empty. :param list_type: :class:`ListType` @@ -1499,6 +1543,7 @@ def update(self, It can take a few minutes for the changes to take effect. :param ip_access_list_id: str + The ID for the corresponding IP access list :param enabled: bool (optional) Specifies whether this IP access list is enabled. :param ip_addresses: List[str] (optional) @@ -1715,7 +1760,7 @@ def delete(self, ip_access_list_id: str): Deletes an IP access list, specified by its list ID. :param ip_access_list_id: str - The ID for the corresponding IP access list to modify + The ID for the corresponding IP access list """ @@ -1729,7 +1774,7 @@ def get(self, ip_access_list_id: str) -> FetchIpAccessListResponse: Gets an IP access list, specified by its list ID. :param ip_access_list_id: str - The ID for the corresponding IP access list to modify + The ID for the corresponding IP access list :returns: :class:`FetchIpAccessListResponse` """ @@ -1772,7 +1817,7 @@ def replace(self, :method:workspaceconf/setStatus. :param ip_access_list_id: str - The ID for the corresponding IP access list to modify + The ID for the corresponding IP access list :param label: str Label for the IP access list. This **cannot** be empty. :param list_type: :class:`ListType` @@ -1819,7 +1864,7 @@ def update(self, no effect until you enable the feature. See :method:workspaceconf/setStatus. :param ip_access_list_id: str - The ID for the corresponding IP access list to modify + The ID for the corresponding IP access list :param enabled: bool (optional) Specifies whether this IP access list is enabled. :param ip_addresses: List[str] (optional) @@ -2196,19 +2241,19 @@ def __init__(self, api_client): def create_obo_token(self, application_id: str, - lifetime_seconds: int, *, - comment: Optional[str] = None) -> CreateOboTokenResponse: + comment: Optional[str] = None, + lifetime_seconds: Optional[int] = None) -> CreateOboTokenResponse: """Create on-behalf token. Creates a token on behalf of a service principal. :param application_id: str Application ID of the service principal. - :param lifetime_seconds: int - The number of seconds before the token expires. :param comment: str (optional) Comment that describes the purpose of the token. + :param lifetime_seconds: int (optional) + The number of seconds before the token expires. :returns: :class:`CreateOboTokenResponse` """ @@ -2234,10 +2279,10 @@ def delete(self, token_id: str): """ - headers = {} + headers = {'Accept': 'application/json', } self._api.do('DELETE', f'/api/2.0/token-management/tokens/{token_id}', headers=headers) - def get(self, token_id: str) -> TokenInfo: + def get(self, token_id: str) -> GetTokenResponse: """Get token info. Gets information about a token, specified by its ID. @@ -2245,12 +2290,12 @@ def get(self, token_id: str) -> TokenInfo: :param token_id: str The ID of the token to get. - :returns: :class:`TokenInfo` + :returns: :class:`GetTokenResponse` """ headers = {'Accept': 'application/json', } res = self._api.do('GET', f'/api/2.0/token-management/tokens/{token_id}', headers=headers) - return TokenInfo.from_dict(res) + return GetTokenResponse.from_dict(res) def get_permission_levels(self) -> GetTokenPermissionLevelsResponse: """Get token permission levels. @@ -2280,13 +2325,13 @@ def get_permissions(self) -> TokenPermissions: def list(self, *, - created_by_id: Optional[str] = None, + created_by_id: Optional[int] = None, created_by_username: Optional[str] = None) -> Iterator[TokenInfo]: """List all tokens. Lists all tokens associated with the specified workspace or user. - :param created_by_id: str (optional) + :param created_by_id: int (optional) User ID of the user that created the token. :param created_by_username: str (optional) Username of the user that created the token. @@ -2363,7 +2408,7 @@ def create(self, :param lifetime_seconds: int (optional) The lifetime of the token, in seconds. - If the ifetime is not specified, this token remains valid indefinitely. + If the lifetime is not specified, this token remains valid indefinitely. :returns: :class:`CreateTokenResponse` """ @@ -2391,17 +2436,17 @@ def delete(self, token_id: str): headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } self._api.do('POST', '/api/2.0/token/delete', body=body, headers=headers) - def list(self) -> Iterator[TokenInfo]: + def list(self) -> Iterator[PublicTokenInfo]: """List tokens. Lists all the valid tokens for a user-workspace pair. - :returns: Iterator over :class:`TokenInfo` + :returns: Iterator over :class:`PublicTokenInfo` """ headers = {'Accept': 'application/json', } json = self._api.do('GET', '/api/2.0/token/list', headers=headers) - parsed = ListTokensResponse.from_dict(json).token_infos + parsed = ListPublicTokensResponse.from_dict(json).token_infos return parsed if parsed is not None else [] diff --git a/databricks/sdk/service/sharing.py b/databricks/sdk/service/sharing.py index bcc21cf5b..37753bdcb 100755 --- a/databricks/sdk/service/sharing.py +++ b/databricks/sdk/service/sharing.py @@ -584,7 +584,8 @@ class ListCleanRoomsResponse: """An array of clean rooms. Remote details (central) are not included.""" next_page_token: Optional[str] = None - """Token to retrieve the next page of results. Absent if there are no more pages.""" + """Opaque token to retrieve the next page of results. Absent if there are no more pages. + __page_token__ should be set to this value for the next request (for the next page of results).""" def as_dict(self) -> dict: """Serializes the ListCleanRoomsResponse into a dictionary suitable for use as a JSON request body.""" @@ -1626,9 +1627,12 @@ def list(self, array. :param max_results: int (optional) - Maximum number of clean rooms to return. + Maximum number of clean rooms to return. If not set, all the clean rooms are returned (not + recommended). - when set to a value greater than 0, the page length is the minimum of this value and + a server configured value; - when set to 0, the page length is set to a server configured value + (recommended); - when set to a value less than 0, an invalid parameter error is returned; :param page_token: str (optional) - Pagination token to go to next page based on previous query. + Opaque pagination token to go to next page based on previous query. :returns: Iterator over :class:`CleanRoomInfo` """ diff --git a/databricks/sdk/service/sql.py b/databricks/sdk/service/sql.py index 3e3a60420..7eacd64e3 100755 --- a/databricks/sdk/service/sql.py +++ b/databricks/sdk/service/sql.py @@ -23,8 +23,8 @@ class AccessControl: group_name: Optional[str] = None permission_level: Optional[PermissionLevel] = None - """* `CAN_VIEW`: Can view the query * `CAN_RUN`: Can run the query * `CAN_MANAGE`: Can manage the - query""" + """* `CAN_VIEW`: Can view the query * `CAN_RUN`: Can run the query * `CAN_EDIT`: Can edit the query + * `CAN_MANAGE`: Can manage the query""" user_name: Optional[str] = None @@ -346,7 +346,6 @@ def from_dict(cls, d: Dict[str, any]) -> ChannelInfo: class ChannelName(Enum): - """Name of the channel""" CHANNEL_NAME_CURRENT = 'CHANNEL_NAME_CURRENT' CHANNEL_NAME_CUSTOM = 'CHANNEL_NAME_CUSTOM' @@ -681,8 +680,8 @@ class Dashboard: """The identifier of the workspace folder containing the object.""" permission_tier: Optional[PermissionLevel] = None - """* `CAN_VIEW`: Can view the query * `CAN_RUN`: Can run the query * `CAN_MANAGE`: Can manage the - query""" + """* `CAN_VIEW`: Can view the query * `CAN_RUN`: Can run the query * `CAN_EDIT`: Can edit the query + * `CAN_MANAGE`: Can manage the query""" slug: Optional[str] = None """URL slug. Usually mirrors the query name with dashes (`-`) instead of spaces. Appears in the URL @@ -745,6 +744,33 @@ def from_dict(cls, d: Dict[str, any]) -> Dashboard: widgets=_repeated_dict(d, 'widgets', Widget)) +@dataclass +class DashboardEditContent: + dashboard_id: Optional[str] = None + + name: Optional[str] = None + """The title of this dashboard that appears in list views and at the top of the dashboard page.""" + + run_as_role: Optional[RunAsRole] = None + """Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior)""" + + def as_dict(self) -> dict: + """Serializes the DashboardEditContent into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.dashboard_id is not None: body['dashboard_id'] = self.dashboard_id + if self.name is not None: body['name'] = self.name + if self.run_as_role is not None: body['run_as_role'] = self.run_as_role.value + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> DashboardEditContent: + """Deserializes the DashboardEditContent from a dictionary.""" + return cls(dashboard_id=d.get('dashboard_id', None), + name=d.get('name', None), + run_as_role=_enum(d, 'run_as_role', RunAsRole)) + + @dataclass class DashboardOptions: moved_to_trash_at: Optional[str] = None @@ -763,6 +789,49 @@ def from_dict(cls, d: Dict[str, any]) -> DashboardOptions: return cls(moved_to_trash_at=d.get('moved_to_trash_at', None)) +@dataclass +class DashboardPostContent: + name: str + """The title of this dashboard that appears in list views and at the top of the dashboard page.""" + + dashboard_filters_enabled: Optional[bool] = None + """Indicates whether the dashboard filters are enabled""" + + is_favorite: Optional[bool] = None + """Indicates whether this dashboard object should appear in the current user's favorites list.""" + + parent: Optional[str] = None + """The identifier of the workspace folder containing the object.""" + + run_as_role: Optional[RunAsRole] = None + """Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior)""" + + tags: Optional[List[str]] = None + + def as_dict(self) -> dict: + """Serializes the DashboardPostContent into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.dashboard_filters_enabled is not None: + body['dashboard_filters_enabled'] = self.dashboard_filters_enabled + if self.is_favorite is not None: body['is_favorite'] = self.is_favorite + if self.name is not None: body['name'] = self.name + if self.parent is not None: body['parent'] = self.parent + if self.run_as_role is not None: body['run_as_role'] = self.run_as_role.value + if self.tags: body['tags'] = [v for v in self.tags] + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> DashboardPostContent: + """Deserializes the DashboardPostContent from a dictionary.""" + return cls(dashboard_filters_enabled=d.get('dashboard_filters_enabled', None), + is_favorite=d.get('is_favorite', None), + name=d.get('name', None), + parent=d.get('parent', None), + run_as_role=_enum(d, 'run_as_role', RunAsRole), + tags=d.get('tags', None)) + + @dataclass class DataSource: """A JSON object representing a DBSQL data source / SQL warehouse.""" @@ -1488,7 +1557,13 @@ class ExternalLink: external_link: Optional[str] = None """A presigned URL pointing to a chunk of result data, hosted by an external service, with a short expiration time (<= 15 minutes). As this URL contains a temporary credential, it should be - considered sensitive and the client should expose this URL in a log.""" + considered sensitive and the client should not expose this URL in a log.""" + + http_headers: Optional[Dict[str, str]] = None + """HTTP headers that must be included with a GET request to the `external_link`. Each header is + provided as a key-value pair. Headers are typically used to pass a decryption key to the + external service. The values of these headers should be considered sensitive and the client + should not expose these values in a log.""" next_chunk_index: Optional[int] = None """When fetching, provides the `chunk_index` for the _next_ chunk. If absent, indicates there are @@ -1513,6 +1588,7 @@ def as_dict(self) -> dict: if self.chunk_index is not None: body['chunk_index'] = self.chunk_index if self.expiration is not None: body['expiration'] = self.expiration if self.external_link is not None: body['external_link'] = self.external_link + if self.http_headers: body['http_headers'] = self.http_headers if self.next_chunk_index is not None: body['next_chunk_index'] = self.next_chunk_index if self.next_chunk_internal_link is not None: body['next_chunk_internal_link'] = self.next_chunk_internal_link @@ -1527,6 +1603,7 @@ def from_dict(cls, d: Dict[str, any]) -> ExternalLink: chunk_index=d.get('chunk_index', None), expiration=d.get('expiration', None), external_link=d.get('external_link', None), + http_headers=d.get('http_headers', None), next_chunk_index=d.get('next_chunk_index', None), next_chunk_internal_link=d.get('next_chunk_internal_link', None), row_count=d.get('row_count', None), @@ -2025,9 +2102,10 @@ class ParameterType(Enum): class PermissionLevel(Enum): - """* `CAN_VIEW`: Can view the query * `CAN_RUN`: Can run the query * `CAN_MANAGE`: Can manage the - query""" + """* `CAN_VIEW`: Can view the query * `CAN_RUN`: Can run the query * `CAN_EDIT`: Can edit the query + * `CAN_MANAGE`: Can manage the query""" + CAN_EDIT = 'CAN_EDIT' CAN_MANAGE = 'CAN_MANAGE' CAN_RUN = 'CAN_RUN' CAN_VIEW = 'CAN_VIEW' @@ -2100,8 +2178,8 @@ class Query: """The identifier of the workspace folder containing the object.""" permission_tier: Optional[PermissionLevel] = None - """* `CAN_VIEW`: Can view the query * `CAN_RUN`: Can run the query * `CAN_MANAGE`: Can manage the - query""" + """* `CAN_VIEW`: Can view the query * `CAN_RUN`: Can run the query * `CAN_EDIT`: Can edit the query + * `CAN_MANAGE`: Can manage the query""" query: Optional[str] = None """The text of the query to be run.""" @@ -2110,7 +2188,8 @@ class Query: """A SHA-256 hash of the query text along with the authenticated user ID.""" run_as_role: Optional[RunAsRole] = None - """Run as role""" + """Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior)""" tags: Optional[List[str]] = None @@ -2206,6 +2285,10 @@ class QueryEditContent: query_id: Optional[str] = None + run_as_role: Optional[RunAsRole] = None + """Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior)""" + def as_dict(self) -> dict: """Serializes the QueryEditContent into a dictionary suitable for use as a JSON request body.""" body = {} @@ -2215,6 +2298,7 @@ def as_dict(self) -> dict: if self.options: body['options'] = self.options if self.query is not None: body['query'] = self.query if self.query_id is not None: body['query_id'] = self.query_id + if self.run_as_role is not None: body['run_as_role'] = self.run_as_role.value return body @classmethod @@ -2225,7 +2309,8 @@ def from_dict(cls, d: Dict[str, any]) -> QueryEditContent: name=d.get('name', None), options=d.get('options', None), query=d.get('query', None), - query_id=d.get('query_id', None)) + query_id=d.get('query_id', None), + run_as_role=_enum(d, 'run_as_role', RunAsRole)) @dataclass @@ -2626,7 +2711,8 @@ class QueryPostContent: """The text of the query to be run.""" run_as_role: Optional[RunAsRole] = None - """Run as role""" + """Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior)""" def as_dict(self) -> dict: """Serializes the QueryPostContent into a dictionary suitable for use as a JSON request body.""" @@ -2848,7 +2934,8 @@ def from_dict(cls, d: Dict[str, any]) -> ResultSchema: class RunAsRole(Enum): - """Run as role""" + """Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior)""" OWNER = 'owner' VIEWER = 'viewer' @@ -3353,8 +3440,7 @@ class WarehouseAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -3903,12 +3989,12 @@ def create(self, :param dashboard_filters_enabled: bool (optional) Indicates whether the dashboard filters are enabled :param is_favorite: bool (optional) - Indicates whether this query object should appear in the current user's favorites list. The - application uses this flag to determine whether or not the "favorite star " should selected. + Indicates whether this dashboard object should appear in the current user's favorites list. :param parent: str (optional) The identifier of the workspace folder containing the object. :param run_as_role: :class:`RunAsRole` (optional) - Run as role + Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior) :param tags: List[str] (optional) :returns: :class:`Dashboard` @@ -4010,6 +4096,37 @@ def restore(self, dashboard_id: str): headers = {'Accept': 'application/json', } self._api.do('POST', f'/api/2.0/preview/sql/dashboards/trash/{dashboard_id}', headers=headers) + def update(self, + dashboard_id: str, + *, + name: Optional[str] = None, + run_as_role: Optional[RunAsRole] = None) -> Dashboard: + """Change a dashboard definition. + + Modify this dashboard definition. This operation only affects attributes of the dashboard object. It + does not add, modify, or remove widgets. + + **Note**: You cannot undo this operation. + + :param dashboard_id: str + :param name: str (optional) + The title of this dashboard that appears in list views and at the top of the dashboard page. + :param run_as_role: :class:`RunAsRole` (optional) + Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior) + + :returns: :class:`Dashboard` + """ + body = {} + if name is not None: body['name'] = name + if run_as_role is not None: body['run_as_role'] = run_as_role.value + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + res = self._api.do('POST', + f'/api/2.0/preview/sql/dashboards/{dashboard_id}', + body=body, + headers=headers) + return Dashboard.from_dict(res) + class DataSourcesAPI: """This API is provided to assist you in making new query objects. When creating a query object, you may @@ -4175,7 +4292,8 @@ def create(self, :param query: str (optional) The text of the query to be run. :param run_as_role: :class:`RunAsRole` (optional) - Run as role + Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior) :returns: :class:`Query` """ @@ -4230,6 +4348,9 @@ def list(self, Gets a list of queries. Optionally, this list can be filtered by a search term. + ### **Warning: Calling this API concurrently 10 or more times could result in throttling, service + degradation, or a temporary ban.** + :param order: str (optional) Name of query attribute to order by. Default sort order is ascending. Append a dash (`-`) to order descending instead. @@ -4297,7 +4418,8 @@ def update(self, description: Optional[str] = None, name: Optional[str] = None, options: Optional[Any] = None, - query: Optional[str] = None) -> Query: + query: Optional[str] = None, + run_as_role: Optional[RunAsRole] = None) -> Query: """Change a query definition. Modify this query definition. @@ -4320,6 +4442,9 @@ def update(self, overridden at runtime. :param query: str (optional) The text of the query to be run. + :param run_as_role: :class:`RunAsRole` (optional) + Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior) :returns: :class:`Query` """ @@ -4329,6 +4454,7 @@ def update(self, if name is not None: body['name'] = name if options is not None: body['options'] = options if query is not None: body['query'] = query + if run_as_role is not None: body['run_as_role'] = run_as_role.value headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } res = self._api.do('POST', f'/api/2.0/preview/sql/queries/{query_id}', body=body, headers=headers) return Query.from_dict(res) diff --git a/databricks/sdk/service/vectorsearch.py b/databricks/sdk/service/vectorsearch.py new file mode 100755 index 000000000..95ad717cb --- /dev/null +++ b/databricks/sdk/service/vectorsearch.py @@ -0,0 +1,1206 @@ +# Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +from __future__ import annotations + +import logging +import random +import time +from dataclasses import dataclass +from datetime import timedelta +from enum import Enum +from typing import Callable, Dict, Iterator, List, Optional + +from ..errors import OperationFailed +from ._internal import Wait, _enum, _from_dict, _repeated_dict + +_LOG = logging.getLogger('databricks.sdk') + +# all definitions in this file are in alphabetical order + + +@dataclass +class ColumnInfo: + name: Optional[str] = None + """Name of the column.""" + + def as_dict(self) -> dict: + """Serializes the ColumnInfo into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.name is not None: body['name'] = self.name + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> ColumnInfo: + """Deserializes the ColumnInfo from a dictionary.""" + return cls(name=d.get('name', None)) + + +@dataclass +class CreateEndpoint: + name: str + """Name of endpoint""" + + endpoint_type: EndpointType + """Type of endpoint.""" + + def as_dict(self) -> dict: + """Serializes the CreateEndpoint into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.endpoint_type is not None: body['endpoint_type'] = self.endpoint_type.value + if self.name is not None: body['name'] = self.name + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> CreateEndpoint: + """Deserializes the CreateEndpoint from a dictionary.""" + return cls(endpoint_type=_enum(d, 'endpoint_type', EndpointType), name=d.get('name', None)) + + +@dataclass +class CreateVectorIndexRequest: + name: str + """Name of the index""" + + primary_key: str + """Primary key of the index""" + + index_type: VectorIndexType + """There are 2 types of Vector Search indexes: + + - `DELTA_SYNC`: An index that automatically syncs with a source Delta Table, automatically and + incrementally updating the index as the underlying data in the Delta Table changes. - + `DIRECT_ACCESS`: An index that supports direct read and write of vectors and metadata through + our REST and SDK APIs. With this model, the user manages index updates.""" + + delta_sync_vector_index_spec: Optional[DeltaSyncVectorIndexSpecRequest] = None + """Specification for Delta Sync Index. Required if `index_type` is `DELTA_SYNC`.""" + + direct_access_index_spec: Optional[DirectAccessVectorIndexSpec] = None + """Specification for Direct Vector Access Index. Required if `index_type` is `DIRECT_ACCESS`.""" + + endpoint_name: Optional[str] = None + """Name of the endpoint to be used for serving the index""" + + def as_dict(self) -> dict: + """Serializes the CreateVectorIndexRequest into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.delta_sync_vector_index_spec: + body['delta_sync_vector_index_spec'] = self.delta_sync_vector_index_spec.as_dict() + if self.direct_access_index_spec: + body['direct_access_index_spec'] = self.direct_access_index_spec.as_dict() + if self.endpoint_name is not None: body['endpoint_name'] = self.endpoint_name + if self.index_type is not None: body['index_type'] = self.index_type.value + if self.name is not None: body['name'] = self.name + if self.primary_key is not None: body['primary_key'] = self.primary_key + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> CreateVectorIndexRequest: + """Deserializes the CreateVectorIndexRequest from a dictionary.""" + return cls(delta_sync_vector_index_spec=_from_dict(d, 'delta_sync_vector_index_spec', + DeltaSyncVectorIndexSpecRequest), + direct_access_index_spec=_from_dict(d, 'direct_access_index_spec', + DirectAccessVectorIndexSpec), + endpoint_name=d.get('endpoint_name', None), + index_type=_enum(d, 'index_type', VectorIndexType), + name=d.get('name', None), + primary_key=d.get('primary_key', None)) + + +@dataclass +class CreateVectorIndexResponse: + vector_index: Optional[VectorIndex] = None + + def as_dict(self) -> dict: + """Serializes the CreateVectorIndexResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.vector_index: body['vector_index'] = self.vector_index.as_dict() + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> CreateVectorIndexResponse: + """Deserializes the CreateVectorIndexResponse from a dictionary.""" + return cls(vector_index=_from_dict(d, 'vector_index', VectorIndex)) + + +@dataclass +class DeleteDataResult: + """Result of the upsert or delete operation.""" + + failed_primary_keys: Optional[List[str]] = None + """List of primary keys for rows that failed to process.""" + + success_row_count: Optional[int] = None + """Count of successfully processed rows.""" + + def as_dict(self) -> dict: + """Serializes the DeleteDataResult into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.failed_primary_keys: body['failed_primary_keys'] = [v for v in self.failed_primary_keys] + if self.success_row_count is not None: body['success_row_count'] = self.success_row_count + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> DeleteDataResult: + """Deserializes the DeleteDataResult from a dictionary.""" + return cls(failed_primary_keys=d.get('failed_primary_keys', None), + success_row_count=d.get('success_row_count', None)) + + +class DeleteDataStatus(Enum): + """Status of the delete operation.""" + + FAILURE = 'FAILURE' + PARTIAL_SUCCESS = 'PARTIAL_SUCCESS' + SUCCESS = 'SUCCESS' + + +@dataclass +class DeleteDataVectorIndexRequest: + """Request payload for deleting data from a vector index.""" + + primary_keys: List[str] + """List of primary keys for the data to be deleted.""" + + name: Optional[str] = None + """Name of the vector index where data is to be deleted. Must be a Direct Vector Access Index.""" + + def as_dict(self) -> dict: + """Serializes the DeleteDataVectorIndexRequest into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.name is not None: body['name'] = self.name + if self.primary_keys: body['primary_keys'] = [v for v in self.primary_keys] + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> DeleteDataVectorIndexRequest: + """Deserializes the DeleteDataVectorIndexRequest from a dictionary.""" + return cls(name=d.get('name', None), primary_keys=d.get('primary_keys', None)) + + +@dataclass +class DeleteDataVectorIndexResponse: + """Response to a delete data vector index request.""" + + result: Optional[DeleteDataResult] = None + """Result of the upsert or delete operation.""" + + status: Optional[DeleteDataStatus] = None + """Status of the delete operation.""" + + def as_dict(self) -> dict: + """Serializes the DeleteDataVectorIndexResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.result: body['result'] = self.result.as_dict() + if self.status is not None: body['status'] = self.status.value + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> DeleteDataVectorIndexResponse: + """Deserializes the DeleteDataVectorIndexResponse from a dictionary.""" + return cls(result=_from_dict(d, 'result', DeleteDataResult), + status=_enum(d, 'status', DeleteDataStatus)) + + +@dataclass +class DeltaSyncVectorIndexSpecRequest: + embedding_source_columns: Optional[List[EmbeddingSourceColumn]] = None + """The columns that contain the embedding source.""" + + embedding_vector_columns: Optional[List[EmbeddingVectorColumn]] = None + """The columns that contain the embedding vectors.""" + + pipeline_type: Optional[PipelineType] = None + """Pipeline execution mode. + + - `TRIGGERED`: If the pipeline uses the triggered execution mode, the system stops processing + after successfully refreshing the source table in the pipeline once, ensuring the table is + updated based on the data available when the update started. - `CONTINUOUS`: If the pipeline + uses continuous execution, the pipeline processes new data as it arrives in the source table to + keep vector index fresh.""" + + source_table: Optional[str] = None + """The name of the source table.""" + + def as_dict(self) -> dict: + """Serializes the DeltaSyncVectorIndexSpecRequest into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.embedding_source_columns: + body['embedding_source_columns'] = [v.as_dict() for v in self.embedding_source_columns] + if self.embedding_vector_columns: + body['embedding_vector_columns'] = [v.as_dict() for v in self.embedding_vector_columns] + if self.pipeline_type is not None: body['pipeline_type'] = self.pipeline_type.value + if self.source_table is not None: body['source_table'] = self.source_table + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> DeltaSyncVectorIndexSpecRequest: + """Deserializes the DeltaSyncVectorIndexSpecRequest from a dictionary.""" + return cls(embedding_source_columns=_repeated_dict(d, 'embedding_source_columns', + EmbeddingSourceColumn), + embedding_vector_columns=_repeated_dict(d, 'embedding_vector_columns', + EmbeddingVectorColumn), + pipeline_type=_enum(d, 'pipeline_type', PipelineType), + source_table=d.get('source_table', None)) + + +@dataclass +class DeltaSyncVectorIndexSpecResponse: + embedding_source_columns: Optional[List[EmbeddingSourceColumn]] = None + """The columns that contain the embedding source.""" + + embedding_vector_columns: Optional[List[EmbeddingVectorColumn]] = None + """The columns that contain the embedding vectors.""" + + pipeline_id: Optional[str] = None + """The ID of the pipeline that is used to sync the index.""" + + pipeline_type: Optional[PipelineType] = None + """Pipeline execution mode. + + - `TRIGGERED`: If the pipeline uses the triggered execution mode, the system stops processing + after successfully refreshing the source table in the pipeline once, ensuring the table is + updated based on the data available when the update started. - `CONTINUOUS`: If the pipeline + uses continuous execution, the pipeline processes new data as it arrives in the source table to + keep vector index fresh.""" + + source_table: Optional[str] = None + """The name of the source table.""" + + def as_dict(self) -> dict: + """Serializes the DeltaSyncVectorIndexSpecResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.embedding_source_columns: + body['embedding_source_columns'] = [v.as_dict() for v in self.embedding_source_columns] + if self.embedding_vector_columns: + body['embedding_vector_columns'] = [v.as_dict() for v in self.embedding_vector_columns] + if self.pipeline_id is not None: body['pipeline_id'] = self.pipeline_id + if self.pipeline_type is not None: body['pipeline_type'] = self.pipeline_type.value + if self.source_table is not None: body['source_table'] = self.source_table + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> DeltaSyncVectorIndexSpecResponse: + """Deserializes the DeltaSyncVectorIndexSpecResponse from a dictionary.""" + return cls(embedding_source_columns=_repeated_dict(d, 'embedding_source_columns', + EmbeddingSourceColumn), + embedding_vector_columns=_repeated_dict(d, 'embedding_vector_columns', + EmbeddingVectorColumn), + pipeline_id=d.get('pipeline_id', None), + pipeline_type=_enum(d, 'pipeline_type', PipelineType), + source_table=d.get('source_table', None)) + + +@dataclass +class DirectAccessVectorIndexSpec: + embedding_vector_columns: Optional[List[EmbeddingVectorColumn]] = None + + schema_json: Optional[str] = None + """The schema of the index in JSON format. + + Supported types are `integer`, `long`, `float`, `double`, `boolean`, `string`, `date`, + `timestamp`. + + Supported types for vector column: `array`, `array`,`.""" + + def as_dict(self) -> dict: + """Serializes the DirectAccessVectorIndexSpec into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.embedding_vector_columns: + body['embedding_vector_columns'] = [v.as_dict() for v in self.embedding_vector_columns] + if self.schema_json is not None: body['schema_json'] = self.schema_json + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> DirectAccessVectorIndexSpec: + """Deserializes the DirectAccessVectorIndexSpec from a dictionary.""" + return cls(embedding_vector_columns=_repeated_dict(d, 'embedding_vector_columns', + EmbeddingVectorColumn), + schema_json=d.get('schema_json', None)) + + +@dataclass +class EmbeddingConfig: + embedding_model_endpoint_name: Optional[str] = None + """Name of the embedding model endpoint""" + + def as_dict(self) -> dict: + """Serializes the EmbeddingConfig into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.embedding_model_endpoint_name is not None: + body['embedding_model_endpoint_name'] = self.embedding_model_endpoint_name + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> EmbeddingConfig: + """Deserializes the EmbeddingConfig from a dictionary.""" + return cls(embedding_model_endpoint_name=d.get('embedding_model_endpoint_name', None)) + + +@dataclass +class EmbeddingSourceColumn: + embedding_config: Optional[EmbeddingConfig] = None + + name: Optional[str] = None + """Name of the column""" + + def as_dict(self) -> dict: + """Serializes the EmbeddingSourceColumn into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.embedding_config: body['embedding_config'] = self.embedding_config.as_dict() + if self.name is not None: body['name'] = self.name + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> EmbeddingSourceColumn: + """Deserializes the EmbeddingSourceColumn from a dictionary.""" + return cls(embedding_config=_from_dict(d, 'embedding_config', EmbeddingConfig), + name=d.get('name', None)) + + +@dataclass +class EmbeddingVectorColumn: + embedding_dimension: Optional[int] = None + """Dimension of the embedding vector""" + + name: Optional[str] = None + """Name of the column""" + + def as_dict(self) -> dict: + """Serializes the EmbeddingVectorColumn into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.embedding_dimension is not None: body['embedding_dimension'] = self.embedding_dimension + if self.name is not None: body['name'] = self.name + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> EmbeddingVectorColumn: + """Deserializes the EmbeddingVectorColumn from a dictionary.""" + return cls(embedding_dimension=d.get('embedding_dimension', None), name=d.get('name', None)) + + +@dataclass +class EndpointInfo: + creation_timestamp: Optional[int] = None + """Timestamp of endpoint creation""" + + creator: Optional[str] = None + """Creator of the endpoint""" + + endpoint_status: Optional[EndpointStatus] = None + """Current status of the endpoint""" + + endpoint_type: Optional[EndpointType] = None + """Type of endpoint.""" + + id: Optional[str] = None + """Unique identifier of the endpoint""" + + last_updated_timestamp: Optional[int] = None + """Timestamp of last update to the endpoint""" + + last_updated_user: Optional[str] = None + """User who last updated the endpoint""" + + name: Optional[str] = None + """Name of endpoint""" + + num_indexes: Optional[int] = None + """Number of indexes on the endpoint""" + + def as_dict(self) -> dict: + """Serializes the EndpointInfo into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.creation_timestamp is not None: body['creation_timestamp'] = self.creation_timestamp + if self.creator is not None: body['creator'] = self.creator + if self.endpoint_status: body['endpoint_status'] = self.endpoint_status.as_dict() + if self.endpoint_type is not None: body['endpoint_type'] = self.endpoint_type.value + if self.id is not None: body['id'] = self.id + if self.last_updated_timestamp is not None: + body['last_updated_timestamp'] = self.last_updated_timestamp + if self.last_updated_user is not None: body['last_updated_user'] = self.last_updated_user + if self.name is not None: body['name'] = self.name + if self.num_indexes is not None: body['num_indexes'] = self.num_indexes + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> EndpointInfo: + """Deserializes the EndpointInfo from a dictionary.""" + return cls(creation_timestamp=d.get('creation_timestamp', None), + creator=d.get('creator', None), + endpoint_status=_from_dict(d, 'endpoint_status', EndpointStatus), + endpoint_type=_enum(d, 'endpoint_type', EndpointType), + id=d.get('id', None), + last_updated_timestamp=d.get('last_updated_timestamp', None), + last_updated_user=d.get('last_updated_user', None), + name=d.get('name', None), + num_indexes=d.get('num_indexes', None)) + + +@dataclass +class EndpointStatus: + """Status information of an endpoint""" + + message: Optional[str] = None + """Additional status message""" + + state: Optional[EndpointStatusState] = None + """Current state of the endpoint""" + + def as_dict(self) -> dict: + """Serializes the EndpointStatus into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.message is not None: body['message'] = self.message + if self.state is not None: body['state'] = self.state.value + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> EndpointStatus: + """Deserializes the EndpointStatus from a dictionary.""" + return cls(message=d.get('message', None), state=_enum(d, 'state', EndpointStatusState)) + + +class EndpointStatusState(Enum): + """Current state of the endpoint""" + + OFFLINE = 'OFFLINE' + ONLINE = 'ONLINE' + PROVISIONING = 'PROVISIONING' + + +class EndpointType(Enum): + """Type of endpoint.""" + + STANDARD = 'STANDARD' + + +@dataclass +class ListEndpointResponse: + endpoints: Optional[List[EndpointInfo]] = None + """An array of Endpoint objects""" + + next_page_token: Optional[str] = None + """A token that can be used to get the next page of results. If not present, there are no more + results to show.""" + + def as_dict(self) -> dict: + """Serializes the ListEndpointResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.endpoints: body['endpoints'] = [v.as_dict() for v in self.endpoints] + if self.next_page_token is not None: body['next_page_token'] = self.next_page_token + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> ListEndpointResponse: + """Deserializes the ListEndpointResponse from a dictionary.""" + return cls(endpoints=_repeated_dict(d, 'endpoints', EndpointInfo), + next_page_token=d.get('next_page_token', None)) + + +@dataclass +class ListVectorIndexesResponse: + next_page_token: Optional[str] = None + """A token that can be used to get the next page of results. If not present, there are no more + results to show.""" + + vector_indexes: Optional[List[MiniVectorIndex]] = None + + def as_dict(self) -> dict: + """Serializes the ListVectorIndexesResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.next_page_token is not None: body['next_page_token'] = self.next_page_token + if self.vector_indexes: body['vector_indexes'] = [v.as_dict() for v in self.vector_indexes] + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> ListVectorIndexesResponse: + """Deserializes the ListVectorIndexesResponse from a dictionary.""" + return cls(next_page_token=d.get('next_page_token', None), + vector_indexes=_repeated_dict(d, 'vector_indexes', MiniVectorIndex)) + + +@dataclass +class MiniVectorIndex: + creator: Optional[str] = None + """The user who created the index.""" + + endpoint_name: Optional[str] = None + """Name of the endpoint associated with the index""" + + index_type: Optional[VectorIndexType] = None + """There are 2 types of Vector Search indexes: + + - `DELTA_SYNC`: An index that automatically syncs with a source Delta Table, automatically and + incrementally updating the index as the underlying data in the Delta Table changes. - + `DIRECT_ACCESS`: An index that supports direct read and write of vectors and metadata through + our REST and SDK APIs. With this model, the user manages index updates.""" + + name: Optional[str] = None + """Name of the index""" + + primary_key: Optional[str] = None + """Primary key of the index""" + + def as_dict(self) -> dict: + """Serializes the MiniVectorIndex into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.creator is not None: body['creator'] = self.creator + if self.endpoint_name is not None: body['endpoint_name'] = self.endpoint_name + if self.index_type is not None: body['index_type'] = self.index_type.value + if self.name is not None: body['name'] = self.name + if self.primary_key is not None: body['primary_key'] = self.primary_key + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MiniVectorIndex: + """Deserializes the MiniVectorIndex from a dictionary.""" + return cls(creator=d.get('creator', None), + endpoint_name=d.get('endpoint_name', None), + index_type=_enum(d, 'index_type', VectorIndexType), + name=d.get('name', None), + primary_key=d.get('primary_key', None)) + + +class PipelineType(Enum): + """Pipeline execution mode. + + - `TRIGGERED`: If the pipeline uses the triggered execution mode, the system stops processing + after successfully refreshing the source table in the pipeline once, ensuring the table is + updated based on the data available when the update started. - `CONTINUOUS`: If the pipeline + uses continuous execution, the pipeline processes new data as it arrives in the source table to + keep vector index fresh.""" + + CONTINUOUS = 'CONTINUOUS' + TRIGGERED = 'TRIGGERED' + + +@dataclass +class QueryVectorIndexRequest: + columns: List[str] + """List of column names to include in the response.""" + + filters_json: Optional[str] = None + """JSON string representing query filters. + + Example filters: - `{"id <": 5}`: Filter for id less than 5. - `{"id >": 5}`: Filter for id + greater than 5. - `{"id <=": 5}`: Filter for id less than equal to 5. - `{"id >=": 5}`: Filter + for id greater than equal to 5. - `{"id": 5}`: Filter for id equal to 5.""" + + index_name: Optional[str] = None + """Name of the vector index to query.""" + + num_results: Optional[int] = None + """Number of results to return. Defaults to 10.""" + + query_text: Optional[str] = None + """Query text. Required for Delta Sync Index using model endpoint.""" + + query_vector: Optional[List[float]] = None + """Query vector. Required for Direct Vector Access Index and Delta Sync Index using self-managed + vectors.""" + + def as_dict(self) -> dict: + """Serializes the QueryVectorIndexRequest into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.columns: body['columns'] = [v for v in self.columns] + if self.filters_json is not None: body['filters_json'] = self.filters_json + if self.index_name is not None: body['index_name'] = self.index_name + if self.num_results is not None: body['num_results'] = self.num_results + if self.query_text is not None: body['query_text'] = self.query_text + if self.query_vector: body['query_vector'] = [v for v in self.query_vector] + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> QueryVectorIndexRequest: + """Deserializes the QueryVectorIndexRequest from a dictionary.""" + return cls(columns=d.get('columns', None), + filters_json=d.get('filters_json', None), + index_name=d.get('index_name', None), + num_results=d.get('num_results', None), + query_text=d.get('query_text', None), + query_vector=d.get('query_vector', None)) + + +@dataclass +class QueryVectorIndexResponse: + manifest: Optional[ResultManifest] = None + """Metadata about the result set.""" + + result: Optional[ResultData] = None + """Data returned in the query result.""" + + def as_dict(self) -> dict: + """Serializes the QueryVectorIndexResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.manifest: body['manifest'] = self.manifest.as_dict() + if self.result: body['result'] = self.result.as_dict() + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> QueryVectorIndexResponse: + """Deserializes the QueryVectorIndexResponse from a dictionary.""" + return cls(manifest=_from_dict(d, 'manifest', ResultManifest), + result=_from_dict(d, 'result', ResultData)) + + +@dataclass +class ResultData: + """Data returned in the query result.""" + + data_array: Optional[List[List[str]]] = None + """Data rows returned in the query.""" + + row_count: Optional[int] = None + """Number of rows in the result set.""" + + def as_dict(self) -> dict: + """Serializes the ResultData into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.data_array: body['data_array'] = [v for v in self.data_array] + if self.row_count is not None: body['row_count'] = self.row_count + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> ResultData: + """Deserializes the ResultData from a dictionary.""" + return cls(data_array=d.get('data_array', None), row_count=d.get('row_count', None)) + + +@dataclass +class ResultManifest: + """Metadata about the result set.""" + + column_count: Optional[int] = None + """Number of columns in the result set.""" + + columns: Optional[List[ColumnInfo]] = None + """Information about each column in the result set.""" + + def as_dict(self) -> dict: + """Serializes the ResultManifest into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.column_count is not None: body['column_count'] = self.column_count + if self.columns: body['columns'] = [v.as_dict() for v in self.columns] + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> ResultManifest: + """Deserializes the ResultManifest from a dictionary.""" + return cls(column_count=d.get('column_count', None), columns=_repeated_dict(d, 'columns', ColumnInfo)) + + +@dataclass +class UpsertDataResult: + """Result of the upsert or delete operation.""" + + failed_primary_keys: Optional[List[str]] = None + """List of primary keys for rows that failed to process.""" + + success_row_count: Optional[int] = None + """Count of successfully processed rows.""" + + def as_dict(self) -> dict: + """Serializes the UpsertDataResult into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.failed_primary_keys: body['failed_primary_keys'] = [v for v in self.failed_primary_keys] + if self.success_row_count is not None: body['success_row_count'] = self.success_row_count + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> UpsertDataResult: + """Deserializes the UpsertDataResult from a dictionary.""" + return cls(failed_primary_keys=d.get('failed_primary_keys', None), + success_row_count=d.get('success_row_count', None)) + + +class UpsertDataStatus(Enum): + """Status of the upsert operation.""" + + FAILURE = 'FAILURE' + PARTIAL_SUCCESS = 'PARTIAL_SUCCESS' + SUCCESS = 'SUCCESS' + + +@dataclass +class UpsertDataVectorIndexRequest: + """Request payload for upserting data into a vector index.""" + + inputs_json: str + """JSON string representing the data to be upserted.""" + + name: Optional[str] = None + """Name of the vector index where data is to be upserted. Must be a Direct Vector Access Index.""" + + def as_dict(self) -> dict: + """Serializes the UpsertDataVectorIndexRequest into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.inputs_json is not None: body['inputs_json'] = self.inputs_json + if self.name is not None: body['name'] = self.name + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> UpsertDataVectorIndexRequest: + """Deserializes the UpsertDataVectorIndexRequest from a dictionary.""" + return cls(inputs_json=d.get('inputs_json', None), name=d.get('name', None)) + + +@dataclass +class UpsertDataVectorIndexResponse: + """Response to an upsert data vector index request.""" + + result: Optional[UpsertDataResult] = None + """Result of the upsert or delete operation.""" + + status: Optional[UpsertDataStatus] = None + """Status of the upsert operation.""" + + def as_dict(self) -> dict: + """Serializes the UpsertDataVectorIndexResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.result: body['result'] = self.result.as_dict() + if self.status is not None: body['status'] = self.status.value + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> UpsertDataVectorIndexResponse: + """Deserializes the UpsertDataVectorIndexResponse from a dictionary.""" + return cls(result=_from_dict(d, 'result', UpsertDataResult), + status=_enum(d, 'status', UpsertDataStatus)) + + +@dataclass +class VectorIndex: + creator: Optional[str] = None + """The user who created the index.""" + + delta_sync_vector_index_spec: Optional[DeltaSyncVectorIndexSpecResponse] = None + + direct_access_vector_index_spec: Optional[DirectAccessVectorIndexSpec] = None + + endpoint_name: Optional[str] = None + """Name of the endpoint associated with the index""" + + index_type: Optional[VectorIndexType] = None + """There are 2 types of Vector Search indexes: + + - `DELTA_SYNC`: An index that automatically syncs with a source Delta Table, automatically and + incrementally updating the index as the underlying data in the Delta Table changes. - + `DIRECT_ACCESS`: An index that supports direct read and write of vectors and metadata through + our REST and SDK APIs. With this model, the user manages index updates.""" + + name: Optional[str] = None + """Name of the index""" + + primary_key: Optional[str] = None + """Primary key of the index""" + + status: Optional[VectorIndexStatus] = None + + def as_dict(self) -> dict: + """Serializes the VectorIndex into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.creator is not None: body['creator'] = self.creator + if self.delta_sync_vector_index_spec: + body['delta_sync_vector_index_spec'] = self.delta_sync_vector_index_spec.as_dict() + if self.direct_access_vector_index_spec: + body['direct_access_vector_index_spec'] = self.direct_access_vector_index_spec.as_dict() + if self.endpoint_name is not None: body['endpoint_name'] = self.endpoint_name + if self.index_type is not None: body['index_type'] = self.index_type.value + if self.name is not None: body['name'] = self.name + if self.primary_key is not None: body['primary_key'] = self.primary_key + if self.status: body['status'] = self.status.as_dict() + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> VectorIndex: + """Deserializes the VectorIndex from a dictionary.""" + return cls(creator=d.get('creator', None), + delta_sync_vector_index_spec=_from_dict(d, 'delta_sync_vector_index_spec', + DeltaSyncVectorIndexSpecResponse), + direct_access_vector_index_spec=_from_dict(d, 'direct_access_vector_index_spec', + DirectAccessVectorIndexSpec), + endpoint_name=d.get('endpoint_name', None), + index_type=_enum(d, 'index_type', VectorIndexType), + name=d.get('name', None), + primary_key=d.get('primary_key', None), + status=_from_dict(d, 'status', VectorIndexStatus)) + + +@dataclass +class VectorIndexStatus: + index_url: Optional[str] = None + """Index API Url to be used to perform operations on the index""" + + indexed_row_count: Optional[int] = None + """Number of rows indexed""" + + message: Optional[str] = None + """Message associated with the index status""" + + ready: Optional[bool] = None + """Whether the index is ready for search""" + + def as_dict(self) -> dict: + """Serializes the VectorIndexStatus into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.index_url is not None: body['index_url'] = self.index_url + if self.indexed_row_count is not None: body['indexed_row_count'] = self.indexed_row_count + if self.message is not None: body['message'] = self.message + if self.ready is not None: body['ready'] = self.ready + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> VectorIndexStatus: + """Deserializes the VectorIndexStatus from a dictionary.""" + return cls(index_url=d.get('index_url', None), + indexed_row_count=d.get('indexed_row_count', None), + message=d.get('message', None), + ready=d.get('ready', None)) + + +class VectorIndexType(Enum): + """There are 2 types of Vector Search indexes: + + - `DELTA_SYNC`: An index that automatically syncs with a source Delta Table, automatically and + incrementally updating the index as the underlying data in the Delta Table changes. - + `DIRECT_ACCESS`: An index that supports direct read and write of vectors and metadata through + our REST and SDK APIs. With this model, the user manages index updates.""" + + DELTA_SYNC = 'DELTA_SYNC' + DIRECT_ACCESS = 'DIRECT_ACCESS' + + +class VectorSearchEndpointsAPI: + """**Endpoint**: Represents the compute resources to host vector search indexes.""" + + def __init__(self, api_client): + self._api = api_client + + def wait_get_endpoint_vector_search_endpoint_online( + self, + endpoint_name: str, + timeout=timedelta(minutes=20), + callback: Optional[Callable[[EndpointInfo], None]] = None) -> EndpointInfo: + deadline = time.time() + timeout.total_seconds() + target_states = (EndpointStatusState.ONLINE, ) + failure_states = (EndpointStatusState.OFFLINE, ) + status_message = 'polling...' + attempt = 1 + while time.time() < deadline: + poll = self.get_endpoint(endpoint_name=endpoint_name) + status = poll.endpoint_status.state + status_message = f'current status: {status}' + if poll.endpoint_status: + status_message = poll.endpoint_status.message + if status in target_states: + return poll + if callback: + callback(poll) + if status in failure_states: + msg = f'failed to reach ONLINE, got {status}: {status_message}' + raise OperationFailed(msg) + prefix = f"endpoint_name={endpoint_name}" + sleep = attempt + if sleep > 10: + # sleep 10s max per attempt + sleep = 10 + _LOG.debug(f'{prefix}: ({status}) {status_message} (sleeping ~{sleep}s)') + time.sleep(sleep + random.random()) + attempt += 1 + raise TimeoutError(f'timed out after {timeout}: {status_message}') + + def create_endpoint(self, name: str, endpoint_type: EndpointType) -> Wait[EndpointInfo]: + """Create an endpoint. + + Create a new endpoint. + + :param name: str + Name of endpoint + :param endpoint_type: :class:`EndpointType` + Type of endpoint. + + :returns: + Long-running operation waiter for :class:`EndpointInfo`. + See :method:wait_get_endpoint_vector_search_endpoint_online for more details. + """ + body = {} + if endpoint_type is not None: body['endpoint_type'] = endpoint_type.value + if name is not None: body['name'] = name + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + op_response = self._api.do('POST', '/api/2.0/vector-search/endpoints', body=body, headers=headers) + return Wait(self.wait_get_endpoint_vector_search_endpoint_online, + response=EndpointInfo.from_dict(op_response), + endpoint_name=op_response['name']) + + def create_endpoint_and_wait(self, name: str, endpoint_type: EndpointType, + timeout=timedelta(minutes=20)) -> EndpointInfo: + return self.create_endpoint(endpoint_type=endpoint_type, name=name).result(timeout=timeout) + + def delete_endpoint(self, endpoint_name: str, name: str): + """Delete an endpoint. + + :param endpoint_name: str + Name of the endpoint + :param name: str + Name of the endpoint to delete + + + """ + + headers = {} + self._api.do('DELETE', f'/api/2.0/vector-search/endpoints/{endpoint_name}', headers=headers) + + def get_endpoint(self, endpoint_name: str) -> EndpointInfo: + """Get an endpoint. + + :param endpoint_name: str + Name of the endpoint + + :returns: :class:`EndpointInfo` + """ + + headers = {'Accept': 'application/json', } + res = self._api.do('GET', f'/api/2.0/vector-search/endpoints/{endpoint_name}', headers=headers) + return EndpointInfo.from_dict(res) + + def list_endpoints(self, *, page_token: Optional[str] = None) -> Iterator[EndpointInfo]: + """List all endpoints. + + :param page_token: str (optional) + Token for pagination + + :returns: Iterator over :class:`EndpointInfo` + """ + + query = {} + if page_token is not None: query['page_token'] = page_token + headers = {'Accept': 'application/json', } + + while True: + json = self._api.do('GET', '/api/2.0/vector-search/endpoints', query=query, headers=headers) + if 'endpoints' not in json or not json['endpoints']: + return + for v in json['endpoints']: + yield EndpointInfo.from_dict(v) + if 'next_page_token' not in json or not json['next_page_token']: + return + query['page_token'] = json['next_page_token'] + + +class VectorSearchIndexesAPI: + """**Index**: An efficient representation of your embedding vectors that supports real-time and efficient + approximate nearest neighbor (ANN) search queries. + + There are 2 types of Vector Search indexes: * **Delta Sync Index**: An index that automatically syncs with + a source Delta Table, automatically and incrementally updating the index as the underlying data in the + Delta Table changes. * **Direct Vector Access Index**: An index that supports direct read and write of + vectors and metadata through our REST and SDK APIs. With this model, the user manages index updates.""" + + def __init__(self, api_client): + self._api = api_client + + def create_index(self, + name: str, + primary_key: str, + index_type: VectorIndexType, + *, + delta_sync_vector_index_spec: Optional[DeltaSyncVectorIndexSpecRequest] = None, + direct_access_index_spec: Optional[DirectAccessVectorIndexSpec] = None, + endpoint_name: Optional[str] = None) -> CreateVectorIndexResponse: + """Create an index. + + Create a new index. + + :param name: str + Name of the index + :param primary_key: str + Primary key of the index + :param index_type: :class:`VectorIndexType` + There are 2 types of Vector Search indexes: + + - `DELTA_SYNC`: An index that automatically syncs with a source Delta Table, automatically and + incrementally updating the index as the underlying data in the Delta Table changes. - + `DIRECT_ACCESS`: An index that supports direct read and write of vectors and metadata through our + REST and SDK APIs. With this model, the user manages index updates. + :param delta_sync_vector_index_spec: :class:`DeltaSyncVectorIndexSpecRequest` (optional) + Specification for Delta Sync Index. Required if `index_type` is `DELTA_SYNC`. + :param direct_access_index_spec: :class:`DirectAccessVectorIndexSpec` (optional) + Specification for Direct Vector Access Index. Required if `index_type` is `DIRECT_ACCESS`. + :param endpoint_name: str (optional) + Name of the endpoint to be used for serving the index + + :returns: :class:`CreateVectorIndexResponse` + """ + body = {} + if delta_sync_vector_index_spec is not None: + body['delta_sync_vector_index_spec'] = delta_sync_vector_index_spec.as_dict() + if direct_access_index_spec is not None: + body['direct_access_index_spec'] = direct_access_index_spec.as_dict() + if endpoint_name is not None: body['endpoint_name'] = endpoint_name + if index_type is not None: body['index_type'] = index_type.value + if name is not None: body['name'] = name + if primary_key is not None: body['primary_key'] = primary_key + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + res = self._api.do('POST', '/api/2.0/vector-search/indexes', body=body, headers=headers) + return CreateVectorIndexResponse.from_dict(res) + + def delete_data_vector_index(self, name: str, primary_keys: List[str]) -> DeleteDataVectorIndexResponse: + """Delete data from index. + + Handles the deletion of data from a specified vector index. + + :param name: str + Name of the vector index where data is to be deleted. Must be a Direct Vector Access Index. + :param primary_keys: List[str] + List of primary keys for the data to be deleted. + + :returns: :class:`DeleteDataVectorIndexResponse` + """ + body = {} + if primary_keys is not None: body['primary_keys'] = [v for v in primary_keys] + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + res = self._api.do('POST', + f'/api/2.0/vector-search/indexes/{name}/delete-data', + body=body, + headers=headers) + return DeleteDataVectorIndexResponse.from_dict(res) + + def delete_index(self, index_name: str): + """Delete an index. + + Delete an index. + + :param index_name: str + Name of the index + + + """ + + headers = {} + self._api.do('DELETE', f'/api/2.0/vector-search/indexes/{index_name}', headers=headers) + + def get_index(self, index_name: str) -> VectorIndex: + """Get an index. + + Get an index. + + :param index_name: str + Name of the index + + :returns: :class:`VectorIndex` + """ + + headers = {'Accept': 'application/json', } + res = self._api.do('GET', f'/api/2.0/vector-search/indexes/{index_name}', headers=headers) + return VectorIndex.from_dict(res) + + def list_indexes(self, + endpoint_name: str, + *, + page_token: Optional[str] = None) -> Iterator[MiniVectorIndex]: + """List indexes. + + List all indexes in the given endpoint. + + :param endpoint_name: str + Name of the endpoint + :param page_token: str (optional) + Token for pagination + + :returns: Iterator over :class:`MiniVectorIndex` + """ + + query = {} + if endpoint_name is not None: query['endpoint_name'] = endpoint_name + if page_token is not None: query['page_token'] = page_token + headers = {'Accept': 'application/json', } + + while True: + json = self._api.do('GET', '/api/2.0/vector-search/indexes', query=query, headers=headers) + if 'vector_indexes' not in json or not json['vector_indexes']: + return + for v in json['vector_indexes']: + yield MiniVectorIndex.from_dict(v) + if 'next_page_token' not in json or not json['next_page_token']: + return + query['page_token'] = json['next_page_token'] + + def query_index(self, + index_name: str, + columns: List[str], + *, + filters_json: Optional[str] = None, + num_results: Optional[int] = None, + query_text: Optional[str] = None, + query_vector: Optional[List[float]] = None) -> QueryVectorIndexResponse: + """Query an index. + + Query the specified vector index. + + :param index_name: str + Name of the vector index to query. + :param columns: List[str] + List of column names to include in the response. + :param filters_json: str (optional) + JSON string representing query filters. + + Example filters: - `{"id <": 5}`: Filter for id less than 5. - `{"id >": 5}`: Filter for id greater + than 5. - `{"id <=": 5}`: Filter for id less than equal to 5. - `{"id >=": 5}`: Filter for id + greater than equal to 5. - `{"id": 5}`: Filter for id equal to 5. + :param num_results: int (optional) + Number of results to return. Defaults to 10. + :param query_text: str (optional) + Query text. Required for Delta Sync Index using model endpoint. + :param query_vector: List[float] (optional) + Query vector. Required for Direct Vector Access Index and Delta Sync Index using self-managed + vectors. + + :returns: :class:`QueryVectorIndexResponse` + """ + body = {} + if columns is not None: body['columns'] = [v for v in columns] + if filters_json is not None: body['filters_json'] = filters_json + if num_results is not None: body['num_results'] = num_results + if query_text is not None: body['query_text'] = query_text + if query_vector is not None: body['query_vector'] = [v for v in query_vector] + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + res = self._api.do('POST', + f'/api/2.0/vector-search/indexes/{index_name}/query', + body=body, + headers=headers) + return QueryVectorIndexResponse.from_dict(res) + + def sync_index(self, index_name: str): + """Synchronize an index. + + Triggers a synchronization process for a specified vector index. + + :param index_name: str + Name of the vector index to synchronize. Must be a Delta Sync Index. + + + """ + + headers = {} + self._api.do('POST', f'/api/2.0/vector-search/indexes/{index_name}/sync', headers=headers) + + def upsert_data_vector_index(self, name: str, inputs_json: str) -> UpsertDataVectorIndexResponse: + """Upsert data into an index. + + Handles the upserting of data into a specified vector index. + + :param name: str + Name of the vector index where data is to be upserted. Must be a Direct Vector Access Index. + :param inputs_json: str + JSON string representing the data to be upserted. + + :returns: :class:`UpsertDataVectorIndexResponse` + """ + body = {} + if inputs_json is not None: body['inputs_json'] = inputs_json + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + res = self._api.do('POST', + f'/api/2.0/vector-search/indexes/{name}/upsert-data', + body=body, + headers=headers) + return UpsertDataVectorIndexResponse.from_dict(res) diff --git a/databricks/sdk/service/workspace.py b/databricks/sdk/service/workspace.py index a5277a7b2..cf5dacc3a 100755 --- a/databricks/sdk/service/workspace.py +++ b/databricks/sdk/service/workspace.py @@ -721,8 +721,7 @@ class RepoAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -1101,8 +1100,7 @@ class WorkspaceObjectAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" diff --git a/databricks/sdk/version.py b/databricks/sdk/version.py index 8911e95ca..5ec52a922 100644 --- a/databricks/sdk/version.py +++ b/databricks/sdk/version.py @@ -1 +1 @@ -__version__ = '0.16.0' +__version__ = '0.18.0' diff --git a/docs/account/billing/billable_usage.rst b/docs/account/billing/billable_usage.rst new file mode 100644 index 000000000..51c2eb2fa --- /dev/null +++ b/docs/account/billing/billable_usage.rst @@ -0,0 +1,44 @@ +``a.billable_usage``: Billable usage download +============================================= +.. currentmodule:: databricks.sdk.service.billing + +.. py:class:: BillableUsageAPI + + This API allows you to download billable usage logs for the specified account and date range. This feature + works with all account types. + + .. py:method:: download(start_month: str, end_month: str [, personal_data: Optional[bool]]) -> DownloadResponse + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + resp = a.billable_usage.download(start_month="2023-01", end_month="2023-02") + + Return billable usage logs. + + Returns billable usage logs in CSV format for the specified account and date range. For the data + schema, see [CSV file schema]. Note that this method might take multiple minutes to complete. + + **Warning**: Depending on the queried date range, the number of workspaces in the account, the size of + the response and the internet speed of the caller, this API may hit a timeout after a few minutes. If + you experience this, try to mitigate by calling the API with narrower date ranges. + + [CSV file schema]: https://docs.databricks.com/administration-guide/account-settings/usage-analysis.html#schema + + :param start_month: str + Format: `YYYY-MM`. First month to return billable usage logs for. This field is required. + :param end_month: str + Format: `YYYY-MM`. Last month to return billable usage logs for. This field is required. + :param personal_data: bool (optional) + Specify whether to include personally identifiable information in the billable usage logs, for + example the email addresses of cluster creators. Handle this information with care. Defaults to + false. + + :returns: :class:`DownloadResponse` + \ No newline at end of file diff --git a/docs/account/billing/budgets.rst b/docs/account/billing/budgets.rst new file mode 100644 index 000000000..85f7ee133 --- /dev/null +++ b/docs/account/billing/budgets.rst @@ -0,0 +1,162 @@ +``a.budgets``: Budgets +====================== +.. currentmodule:: databricks.sdk.service.billing + +.. py:class:: BudgetsAPI + + These APIs manage budget configuration including notifications for exceeding a budget for a period. They + can also retrieve the status of each budget. + + .. py:method:: create(budget: Budget) -> WrappedBudgetWithStatus + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import billing + + a = AccountClient() + + created = a.budgets.create(budget=billing.Budget( + name=f'sdk-{time.time_ns()}', + filter="tag.tagName = 'all'", + period="1 month", + start_date="2022-01-01", + target_amount="100", + alerts=[billing.BudgetAlert(email_notifications=["admin@example.com"], min_percentage=50)])) + + # cleanup + a.budgets.delete(budget_id=created.budget.budget_id) + + Create a new budget. + + Creates a new budget in the specified account. + + :param budget: :class:`Budget` + Budget configuration to be created. + + :returns: :class:`WrappedBudgetWithStatus` + + + .. py:method:: delete(budget_id: str) + + Delete budget. + + Deletes the budget specified by its UUID. + + :param budget_id: str + Budget ID + + + + + .. py:method:: get(budget_id: str) -> WrappedBudgetWithStatus + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import billing + + a = AccountClient() + + created = a.budgets.create(budget=billing.Budget( + name=f'sdk-{time.time_ns()}', + filter="tag.tagName = 'all'", + period="1 month", + start_date="2022-01-01", + target_amount="100", + alerts=[billing.BudgetAlert(email_notifications=["admin@example.com"], min_percentage=50)])) + + by_id = a.budgets.get(budget_id=created.budget.budget_id) + + # cleanup + a.budgets.delete(budget_id=created.budget.budget_id) + + Get budget and its status. + + Gets the budget specified by its UUID, including noncumulative status for each day that the budget is + configured to include. + + :param budget_id: str + Budget ID + + :returns: :class:`WrappedBudgetWithStatus` + + + .. py:method:: list() -> Iterator[BudgetWithStatus] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + all = a.budgets.list() + + Get all budgets. + + Gets all budgets associated with this account, including noncumulative status for each day that the + budget is configured to include. + + :returns: Iterator over :class:`BudgetWithStatus` + + + .. py:method:: update(budget_id: str, budget: Budget) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import billing + + a = AccountClient() + + created = a.budgets.create(budget=billing.Budget( + name=f'sdk-{time.time_ns()}', + filter="tag.tagName = 'all'", + period="1 month", + start_date="2022-01-01", + target_amount="100", + alerts=[billing.BudgetAlert(email_notifications=["admin@example.com"], min_percentage=50)])) + + a.budgets.update(budget_id=created.budget.budget_id, + budget=billing.Budget(name=f'sdk-{time.time_ns()}', + filter="tag.tagName = 'all'", + period="1 month", + start_date="2022-01-01", + target_amount="100", + alerts=[ + billing.BudgetAlert(email_notifications=["admin@example.com"], + min_percentage=70) + ])) + + # cleanup + a.budgets.delete(budget_id=created.budget.budget_id) + + Modify budget. + + Modifies a budget in this account. Budget properties are completely overwritten. + + :param budget_id: str + Budget ID + :param budget: :class:`Budget` + Budget configuration to be created. + + + \ No newline at end of file diff --git a/docs/account/billing/index.rst b/docs/account/billing/index.rst new file mode 100644 index 000000000..522f6f5fd --- /dev/null +++ b/docs/account/billing/index.rst @@ -0,0 +1,12 @@ + +Billing +======= + +Configure different aspects of Databricks billing and usage. + +.. toctree:: + :maxdepth: 1 + + billable_usage + budgets + log_delivery \ No newline at end of file diff --git a/docs/account/billing/log_delivery.rst b/docs/account/billing/log_delivery.rst new file mode 100644 index 000000000..04ef4e349 --- /dev/null +++ b/docs/account/billing/log_delivery.rst @@ -0,0 +1,212 @@ +``a.log_delivery``: Log delivery configurations +=============================================== +.. currentmodule:: databricks.sdk.service.billing + +.. py:class:: LogDeliveryAPI + + These APIs manage log delivery configurations for this account. The two supported log types for this API + are _billable usage logs_ and _audit logs_. This feature is in Public Preview. This feature works with all + account ID types. + + Log delivery works with all account types. However, if your account is on the E2 version of the platform + or on a select custom plan that allows multiple workspaces per account, you can optionally configure + different storage destinations for each workspace. Log delivery status is also provided to know the latest + status of log delivery attempts. The high-level flow of billable usage delivery: + + 1. **Create storage**: In AWS, [create a new AWS S3 bucket] with a specific bucket policy. Using + Databricks APIs, call the Account API to create a [storage configuration object](:method:Storage/Create) + that uses the bucket name. 2. **Create credentials**: In AWS, create the appropriate AWS IAM role. For + full details, including the required IAM role policies and trust relationship, see [Billable usage log + delivery]. Using Databricks APIs, call the Account API to create a [credential configuration + object](:method:Credentials/Create) that uses the IAM role"s ARN. 3. **Create log delivery + configuration**: Using Databricks APIs, call the Account API to [create a log delivery + configuration](:method:LogDelivery/Create) that uses the credential and storage configuration objects from + previous steps. You can specify if the logs should include all events of that log type in your account + (_Account level_ delivery) or only events for a specific set of workspaces (_workspace level_ delivery). + Account level log delivery applies to all current and future workspaces plus account level logs, while + workspace level log delivery solely delivers logs related to the specified workspaces. You can create + multiple types of delivery configurations per account. + + For billable usage delivery: * For more information about billable usage logs, see [Billable usage log + delivery]. For the CSV schema, see the [Usage page]. * The delivery location is + `//billable-usage/csv/`, where `` is the name of the optional delivery path + prefix you set up during log delivery configuration. Files are named + `workspaceId=-usageMonth=.csv`. * All billable usage logs apply to specific + workspaces (_workspace level_ logs). You can aggregate usage for your entire account by creating an + _account level_ delivery configuration that delivers logs for all current and future workspaces in your + account. * The files are delivered daily by overwriting the month's CSV file for each workspace. + + For audit log delivery: * For more information about about audit log delivery, see [Audit log delivery], + which includes information about the used JSON schema. * The delivery location is + `//workspaceId=/date=/auditlogs_.json`. + Files may get overwritten with the same content multiple times to achieve exactly-once delivery. * If the + audit log delivery configuration included specific workspace IDs, only _workspace-level_ audit logs for + those workspaces are delivered. If the log delivery configuration applies to the entire account (_account + level_ delivery configuration), the audit log delivery includes workspace-level audit logs for all + workspaces in the account as well as account-level audit logs. See [Audit log delivery] for details. * + Auditable events are typically available in logs within 15 minutes. + + [Audit log delivery]: https://docs.databricks.com/administration-guide/account-settings/audit-logs.html + [Billable usage log delivery]: https://docs.databricks.com/administration-guide/account-settings/billable-usage-delivery.html + [Usage page]: https://docs.databricks.com/administration-guide/account-settings/usage.html + [create a new AWS S3 bucket]: https://docs.databricks.com/administration-guide/account-api/aws-storage.html + + .. py:method:: create( [, log_delivery_configuration: Optional[CreateLogDeliveryConfigurationParams]]) -> WrappedLogDeliveryConfiguration + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import billing, provisioning + + a = AccountClient() + + bucket = a.storage.create(storage_configuration_name=f'sdk-{time.time_ns()}', + root_bucket_info=provisioning.RootBucketInfo(bucket_name=f'sdk-{time.time_ns()}')) + + creds = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_LOGDELIVERY_ARN"]))) + + created = a.log_delivery.create(log_delivery_configuration=billing.CreateLogDeliveryConfigurationParams( + config_name=f'sdk-{time.time_ns()}', + credentials_id=creds.credentials_id, + storage_configuration_id=bucket.storage_configuration_id, + log_type=billing.LogType.AUDIT_LOGS, + output_format=billing.OutputFormat.JSON)) + + # cleanup + a.storage.delete(storage_configuration_id=bucket.storage_configuration_id) + a.credentials.delete(credentials_id=creds.credentials_id) + a.log_delivery.patch_status(log_delivery_configuration_id=created.log_delivery_configuration.config_id, + status=billing.LogDeliveryConfigStatus.DISABLED) + + Create a new log delivery configuration. + + Creates a new Databricks log delivery configuration to enable delivery of the specified type of logs + to your storage location. This requires that you already created a [credential + object](:method:Credentials/Create) (which encapsulates a cross-account service IAM role) and a + [storage configuration object](:method:Storage/Create) (which encapsulates an S3 bucket). + + For full details, including the required IAM role policies and bucket policies, see [Deliver and + access billable usage logs] or [Configure audit logging]. + + **Note**: There is a limit on the number of log delivery configurations available per account (each + limit applies separately to each log type including billable usage and audit logs). You can create a + maximum of two enabled account-level delivery configurations (configurations without a workspace + filter) per type. Additionally, you can create two enabled workspace-level delivery configurations per + workspace for each log type, which means that the same workspace ID can occur in the workspace filter + for no more than two delivery configurations per log type. + + You cannot delete a log delivery configuration, but you can disable it (see [Enable or disable log + delivery configuration](:method:LogDelivery/PatchStatus)). + + [Configure audit logging]: https://docs.databricks.com/administration-guide/account-settings/audit-logs.html + [Deliver and access billable usage logs]: https://docs.databricks.com/administration-guide/account-settings/billable-usage-delivery.html + + :param log_delivery_configuration: :class:`CreateLogDeliveryConfigurationParams` (optional) + + :returns: :class:`WrappedLogDeliveryConfiguration` + + + .. py:method:: get(log_delivery_configuration_id: str) -> WrappedLogDeliveryConfiguration + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import billing, provisioning + + a = AccountClient() + + bucket = a.storage.create(storage_configuration_name=f'sdk-{time.time_ns()}', + root_bucket_info=provisioning.RootBucketInfo(bucket_name=f'sdk-{time.time_ns()}')) + + creds = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_LOGDELIVERY_ARN"]))) + + created = a.log_delivery.create(log_delivery_configuration=billing.CreateLogDeliveryConfigurationParams( + config_name=f'sdk-{time.time_ns()}', + credentials_id=creds.credentials_id, + storage_configuration_id=bucket.storage_configuration_id, + log_type=billing.LogType.AUDIT_LOGS, + output_format=billing.OutputFormat.JSON)) + + by_id = a.log_delivery.get(log_delivery_configuration_id=created.log_delivery_configuration.config_id) + + # cleanup + a.storage.delete(storage_configuration_id=bucket.storage_configuration_id) + a.credentials.delete(credentials_id=creds.credentials_id) + a.log_delivery.patch_status(log_delivery_configuration_id=created.log_delivery_configuration.config_id, + status=billing.LogDeliveryConfigStatus.DISABLED) + + Get log delivery configuration. + + Gets a Databricks log delivery configuration object for an account, both specified by ID. + + :param log_delivery_configuration_id: str + Databricks log delivery configuration ID + + :returns: :class:`WrappedLogDeliveryConfiguration` + + + .. py:method:: list( [, credentials_id: Optional[str], status: Optional[LogDeliveryConfigStatus], storage_configuration_id: Optional[str]]) -> Iterator[LogDeliveryConfiguration] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + from databricks.sdk.service import billing + + a = AccountClient() + + all = a.log_delivery.list(billing.ListLogDeliveryRequest()) + + Get all log delivery configurations. + + Gets all Databricks log delivery configurations associated with an account specified by ID. + + :param credentials_id: str (optional) + Filter by credential configuration ID. + :param status: :class:`LogDeliveryConfigStatus` (optional) + Filter by status `ENABLED` or `DISABLED`. + :param storage_configuration_id: str (optional) + Filter by storage configuration ID. + + :returns: Iterator over :class:`LogDeliveryConfiguration` + + + .. py:method:: patch_status(log_delivery_configuration_id: str, status: LogDeliveryConfigStatus) + + Enable or disable log delivery configuration. + + Enables or disables a log delivery configuration. Deletion of delivery configurations is not + supported, so disable log delivery configurations that are no longer needed. Note that you can't + re-enable a delivery configuration if this would violate the delivery configuration limits described + under [Create log delivery](:method:LogDelivery/Create). + + :param log_delivery_configuration_id: str + Databricks log delivery configuration ID + :param status: :class:`LogDeliveryConfigStatus` + Status of log delivery configuration. Set to `ENABLED` (enabled) or `DISABLED` (disabled). Defaults + to `ENABLED`. You can [enable or disable the + configuration](#operation/patch-log-delivery-config-status) later. Deletion of a configuration is + not supported, so disable a log delivery configuration that is no longer needed. + + + \ No newline at end of file diff --git a/docs/account/catalog/index.rst b/docs/account/catalog/index.rst new file mode 100644 index 000000000..c774d529d --- /dev/null +++ b/docs/account/catalog/index.rst @@ -0,0 +1,12 @@ + +Unity Catalog +============= + +Configure data governance with Unity Catalog for metastores, catalogs, schemas, tables, external locations, and storage credentials + +.. toctree:: + :maxdepth: 1 + + metastore_assignments + metastores + storage_credentials \ No newline at end of file diff --git a/docs/account/catalog/metastore_assignments.rst b/docs/account/catalog/metastore_assignments.rst new file mode 100644 index 000000000..f5b00c6b3 --- /dev/null +++ b/docs/account/catalog/metastore_assignments.rst @@ -0,0 +1,91 @@ +``a.metastore_assignments``: Account Metastore Assignments +========================================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: AccountMetastoreAssignmentsAPI + + These APIs manage metastore assignments to a workspace. + + .. py:method:: create(workspace_id: int, metastore_id: str [, metastore_assignment: Optional[CreateMetastoreAssignment]]) + + Assigns a workspace to a metastore. + + Creates an assignment to a metastore for a workspace + + :param workspace_id: int + Workspace ID. + :param metastore_id: str + Unity Catalog metastore ID + :param metastore_assignment: :class:`CreateMetastoreAssignment` (optional) + + + + + .. py:method:: delete(workspace_id: int, metastore_id: str) + + Delete a metastore assignment. + + Deletes a metastore assignment to a workspace, leaving the workspace with no metastore. + + :param workspace_id: int + Workspace ID. + :param metastore_id: str + Unity Catalog metastore ID + + + + + .. py:method:: get(workspace_id: int) -> AccountsMetastoreAssignment + + Gets the metastore assignment for a workspace. + + Gets the metastore assignment, if any, for the workspace specified by ID. If the workspace is assigned + a metastore, the mappig will be returned. If no metastore is assigned to the workspace, the assignment + will not be found and a 404 returned. + + :param workspace_id: int + Workspace ID. + + :returns: :class:`AccountsMetastoreAssignment` + + + .. py:method:: list(metastore_id: str) -> Iterator[int] + + + Usage: + + .. code-block:: + + import os + + from databricks.sdk import AccountClient + + a = AccountClient() + + ws = a.metastore_assignments.list(metastore_id=os.environ["TEST_METASTORE_ID"]) + + Get all workspaces assigned to a metastore. + + Gets a list of all Databricks workspace IDs that have been assigned to given metastore. + + :param metastore_id: str + Unity Catalog metastore ID + + :returns: Iterator over int + + + .. py:method:: update(workspace_id: int, metastore_id: str [, metastore_assignment: Optional[UpdateMetastoreAssignment]]) + + Updates a metastore assignment to a workspaces. + + Updates an assignment to a metastore for a workspace. Currently, only the default catalog may be + updated. + + :param workspace_id: int + Workspace ID. + :param metastore_id: str + Unity Catalog metastore ID + :param metastore_assignment: :class:`UpdateMetastoreAssignment` (optional) + + + \ No newline at end of file diff --git a/docs/account/catalog/metastores.rst b/docs/account/catalog/metastores.rst new file mode 100644 index 000000000..d0c2752ed --- /dev/null +++ b/docs/account/catalog/metastores.rst @@ -0,0 +1,139 @@ +``a.metastores``: Account Metastores +==================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: AccountMetastoresAPI + + These APIs manage Unity Catalog metastores for an account. A metastore contains catalogs that can be + associated with workspaces + + .. py:method:: create( [, metastore_info: Optional[CreateMetastore]]) -> AccountsMetastoreInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Create metastore. + + Creates a Unity Catalog metastore. + + :param metastore_info: :class:`CreateMetastore` (optional) + + :returns: :class:`AccountsMetastoreInfo` + + + .. py:method:: delete(metastore_id: str [, force: Optional[bool]]) + + Delete a metastore. + + Deletes a Unity Catalog metastore for an account, both specified by ID. + + :param metastore_id: str + Unity Catalog metastore ID + :param force: bool (optional) + Force deletion even if the metastore is not empty. Default is false. + + + + + .. py:method:: get(metastore_id: str) -> AccountsMetastoreInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + _ = w.metastores.get(id=created.metastore_id) + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Get a metastore. + + Gets a Unity Catalog metastore from an account, both specified by ID. + + :param metastore_id: str + Unity Catalog metastore ID + + :returns: :class:`AccountsMetastoreInfo` + + + .. py:method:: list() -> Iterator[MetastoreInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.metastores.list() + + Get all metastores associated with an account. + + Gets all Unity Catalog metastores associated with an account specified by ID. + + :returns: Iterator over :class:`MetastoreInfo` + + + .. py:method:: update(metastore_id: str [, metastore_info: Optional[UpdateMetastore]]) -> AccountsMetastoreInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + _ = w.metastores.update(id=created.metastore_id, name=f'sdk-{time.time_ns()}') + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Update a metastore. + + Updates an existing Unity Catalog metastore. + + :param metastore_id: str + Unity Catalog metastore ID + :param metastore_info: :class:`UpdateMetastore` (optional) + + :returns: :class:`AccountsMetastoreInfo` + \ No newline at end of file diff --git a/docs/account/catalog/storage_credentials.rst b/docs/account/catalog/storage_credentials.rst new file mode 100644 index 000000000..9972ffbf8 --- /dev/null +++ b/docs/account/catalog/storage_credentials.rst @@ -0,0 +1,164 @@ +``a.storage_credentials``: Account Storage Credentials +====================================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: AccountStorageCredentialsAPI + + These APIs manage storage credentials for a particular metastore. + + .. py:method:: create(metastore_id: str [, credential_info: Optional[CreateStorageCredential]]) -> AccountsStorageCredentialInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + created = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + # cleanup + w.storage_credentials.delete(delete=created.name) + + Create a storage credential. + + Creates a new storage credential. The request object is specific to the cloud: + + * **AwsIamRole** for AWS credentials * **AzureServicePrincipal** for Azure credentials * + **GcpServiceAcountKey** for GCP credentials. + + The caller must be a metastore admin and have the **CREATE_STORAGE_CREDENTIAL** privilege on the + metastore. + + :param metastore_id: str + Unity Catalog metastore ID + :param credential_info: :class:`CreateStorageCredential` (optional) + + :returns: :class:`AccountsStorageCredentialInfo` + + + .. py:method:: delete(metastore_id: str, storage_credential_name: str [, force: Optional[bool]]) + + Delete a storage credential. + + Deletes a storage credential from the metastore. The caller must be an owner of the storage + credential. + + :param metastore_id: str + Unity Catalog metastore ID + :param storage_credential_name: str + Name of the storage credential. + :param force: bool (optional) + Force deletion even if the Storage Credential is not empty. Default is false. + + + + + .. py:method:: get(metastore_id: str, storage_credential_name: str) -> AccountsStorageCredentialInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + created = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + by_name = w.storage_credentials.get(name=created.name) + + # cleanup + w.storage_credentials.delete(name=created.name) + + Gets the named storage credential. + + Gets a storage credential from the metastore. The caller must be a metastore admin, the owner of the + storage credential, or have a level of privilege on the storage credential. + + :param metastore_id: str + Unity Catalog metastore ID + :param storage_credential_name: str + Name of the storage credential. + + :returns: :class:`AccountsStorageCredentialInfo` + + + .. py:method:: list(metastore_id: str) -> Iterator[StorageCredentialInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.storage_credentials.list() + + Get all storage credentials assigned to a metastore. + + Gets a list of all storage credentials that have been assigned to given metastore. + + :param metastore_id: str + Unity Catalog metastore ID + + :returns: Iterator over :class:`StorageCredentialInfo` + + + .. py:method:: update(metastore_id: str, storage_credential_name: str [, credential_info: Optional[UpdateStorageCredential]]) -> AccountsStorageCredentialInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + created = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + _ = w.storage_credentials.update( + name=created.name, + comment=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + # cleanup + w.storage_credentials.delete(delete=created.name) + + Updates a storage credential. + + Updates a storage credential on the metastore. The caller must be the owner of the storage credential. + If the caller is a metastore admin, only the __owner__ credential can be changed. + + :param metastore_id: str + Unity Catalog metastore ID + :param storage_credential_name: str + Name of the storage credential. + :param credential_info: :class:`UpdateStorageCredential` (optional) + + :returns: :class:`AccountsStorageCredentialInfo` + \ No newline at end of file diff --git a/docs/account/iam/access_control.rst b/docs/account/iam/access_control.rst new file mode 100644 index 000000000..2537e262c --- /dev/null +++ b/docs/account/iam/access_control.rst @@ -0,0 +1,56 @@ +``a.access_control``: Account Access Control +============================================ +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: AccountAccessControlAPI + + These APIs manage access rules on resources in an account. Currently, only grant rules are supported. A + grant rule specifies a role assigned to a set of principals. A list of rules attached to a resource is + called a rule set. + + .. py:method:: get_assignable_roles_for_resource(resource: str) -> GetAssignableRolesForResourceResponse + + Get assignable roles for a resource. + + Gets all the roles that can be granted on an account level resource. A role is grantable if the rule + set on the resource can contain an access rule of the role. + + :param resource: str + The resource name for which assignable roles will be listed. + + :returns: :class:`GetAssignableRolesForResourceResponse` + + + .. py:method:: get_rule_set(name: str, etag: str) -> RuleSetResponse + + Get a rule set. + + Get a rule set by its name. A rule set is always attached to a resource and contains a list of access + rules on the said resource. Currently only a default rule set for each resource is supported. + + :param name: str + The ruleset name associated with the request. + :param etag: str + Etag used for versioning. The response is at least as fresh as the eTag provided. Etag is used for + optimistic concurrency control as a way to help prevent simultaneous updates of a rule set from + overwriting each other. It is strongly suggested that systems make use of the etag in the read -> + modify -> write pattern to perform rule set updates in order to avoid race conditions that is get an + etag from a GET rule set request, and pass it with the PUT update request to identify the rule set + version you are updating. + + :returns: :class:`RuleSetResponse` + + + .. py:method:: update_rule_set(name: str, rule_set: RuleSetUpdateRequest) -> RuleSetResponse + + Update a rule set. + + Replace the rules of a rule set. First, use get to read the current version of the rule set before + modifying it. This pattern helps prevent conflicts between concurrent updates. + + :param name: str + Name of the rule set. + :param rule_set: :class:`RuleSetUpdateRequest` + + :returns: :class:`RuleSetResponse` + \ No newline at end of file diff --git a/docs/account/iam/groups.rst b/docs/account/iam/groups.rst new file mode 100644 index 000000000..be1af3c86 --- /dev/null +++ b/docs/account/iam/groups.rst @@ -0,0 +1,185 @@ +``a.groups``: Account Groups +============================ +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: AccountGroupsAPI + + Groups simplify identity management, making it easier to assign access to Databricks account, data, and + other securable objects. + + It is best practice to assign access to workspaces and access-control policies in Unity Catalog to groups, + instead of to users individually. All Databricks account identities can be assigned as members of groups, + and members inherit permissions that are assigned to their group. + + .. py:method:: create( [, display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], id: Optional[str], members: Optional[List[ComplexValue]], meta: Optional[ResourceMeta], roles: Optional[List[ComplexValue]], schemas: Optional[List[GroupSchema]]]) -> Group + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + # cleanup + w.groups.delete(id=group.id) + + Create a new group. + + Creates a group in the Databricks account with a unique name, using the supplied group details. + + :param display_name: str (optional) + String that represents a human-readable group name + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the group. See [assigning entitlements] for a full list of supported + values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param id: str (optional) + Databricks group ID + :param members: List[:class:`ComplexValue`] (optional) + :param meta: :class:`ResourceMeta` (optional) + Container for the group identifier. Workspace local versus account. + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`GroupSchema`] (optional) + The schema of the group. + + :returns: :class:`Group` + + + .. py:method:: delete(id: str) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + w.groups.delete(id=group.id) + + Delete a group. + + Deletes a group from the Databricks account. + + :param id: str + Unique ID for a group in the Databricks account. + + + + + .. py:method:: get(id: str) -> Group + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + fetch = w.groups.get(id=group.id) + + # cleanup + w.groups.delete(id=group.id) + + Get group details. + + Gets the information for a specific group in the Databricks account. + + :param id: str + Unique ID for a group in the Databricks account. + + :returns: :class:`Group` + + + .. py:method:: list( [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[ListSortOrder], start_index: Optional[int]]) -> Iterator[Group] + + List group details. + + Gets all details of the groups associated with the Databricks account. + + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. Default is 10000. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. + :param sort_order: :class:`ListSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: Iterator over :class:`Group` + + + .. py:method:: patch(id: str [, operations: Optional[List[Patch]], schemas: Optional[List[PatchSchema]]]) + + Update group details. + + Partially updates the details of a group. + + :param id: str + Unique ID for a group in the Databricks account. + :param operations: List[:class:`Patch`] (optional) + :param schemas: List[:class:`PatchSchema`] (optional) + The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. + + + + + .. py:method:: update(id: str [, display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], members: Optional[List[ComplexValue]], meta: Optional[ResourceMeta], roles: Optional[List[ComplexValue]], schemas: Optional[List[GroupSchema]]]) + + Replace a group. + + Updates the details of a group by replacing the entire group entity. + + :param id: str + Databricks group ID + :param display_name: str (optional) + String that represents a human-readable group name + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the group. See [assigning entitlements] for a full list of supported + values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param members: List[:class:`ComplexValue`] (optional) + :param meta: :class:`ResourceMeta` (optional) + Container for the group identifier. Workspace local versus account. + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`GroupSchema`] (optional) + The schema of the group. + + + \ No newline at end of file diff --git a/docs/account/iam/index.rst b/docs/account/iam/index.rst new file mode 100644 index 000000000..1939a1a1e --- /dev/null +++ b/docs/account/iam/index.rst @@ -0,0 +1,14 @@ + +Identity and Access Management +============================== + +Manage users, service principals, groups and their permissions in Accounts and Workspaces + +.. toctree:: + :maxdepth: 1 + + access_control + groups + service_principals + users + workspace_assignment \ No newline at end of file diff --git a/docs/account/iam/service_principals.rst b/docs/account/iam/service_principals.rst new file mode 100644 index 000000000..baef75be9 --- /dev/null +++ b/docs/account/iam/service_principals.rst @@ -0,0 +1,241 @@ +``a.service_principals``: Account Service Principals +==================================================== +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: AccountServicePrincipalsAPI + + Identities for use with jobs, automated tools, and systems such as scripts, apps, and CI/CD platforms. + Databricks recommends creating service principals to run production jobs or modify production data. If all + processes that act on production data run with service principals, interactive users do not need any + write, delete, or modify privileges in production. This eliminates the risk of a user overwriting + production data by accident. + + .. py:method:: create( [, active: Optional[bool], application_id: Optional[str], display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], id: Optional[str], roles: Optional[List[ComplexValue]], schemas: Optional[List[ServicePrincipalSchema]]]) -> ServicePrincipal + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + groups = w.groups.group_display_name_to_id_map(iam.ListGroupsRequest()) + + spn = w.service_principals.create(display_name=f'sdk-{time.time_ns()}', + groups=[iam.ComplexValue(value=groups["admins"])]) + + # cleanup + w.service_principals.delete(id=spn.id) + + Create a service principal. + + Creates a new service principal in the Databricks account. + + :param active: bool (optional) + If this user is active + :param application_id: str (optional) + UUID relating to the service principal + :param display_name: str (optional) + String that represents a concatenation of given and family names. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the service principal. See [assigning entitlements] for a full list of + supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param id: str (optional) + Databricks service principal ID. + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`ServicePrincipalSchema`] (optional) + The schema of the List response. + + :returns: :class:`ServicePrincipal` + + + .. py:method:: delete(id: str) + + Delete a service principal. + + Delete a single service principal in the Databricks account. + + :param id: str + Unique ID for a service principal in the Databricks account. + + + + + .. py:method:: get(id: str) -> ServicePrincipal + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.service_principals.create(display_name=f'sdk-{time.time_ns()}') + + by_id = w.service_principals.get(id=created.id) + + # cleanup + w.service_principals.delete(id=created.id) + + Get service principal details. + + Gets the details for a single service principal define in the Databricks account. + + :param id: str + Unique ID for a service principal in the Databricks account. + + :returns: :class:`ServicePrincipal` + + + .. py:method:: list( [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[ListSortOrder], start_index: Optional[int]]) -> Iterator[ServicePrincipal] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + sp_create = a.service_principals.create(active=True, display_name=f'sdk-{time.time_ns()}') + + sp = a.service_principals.get(id=sp_create.id) + + sp_list = a.service_principals.list(filter="displayName eq %v" % (sp.display_name)) + + # cleanup + a.service_principals.delete(id=sp_create.id) + + List service principals. + + Gets the set of service principals associated with a Databricks account. + + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. Default is 10000. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. + :param sort_order: :class:`ListSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: Iterator over :class:`ServicePrincipal` + + + .. py:method:: patch(id: str [, operations: Optional[List[Patch]], schemas: Optional[List[PatchSchema]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import iam + + a = AccountClient() + + sp_create = a.service_principals.create(active=True, display_name=f'sdk-{time.time_ns()}') + + sp = a.service_principals.get(id=sp_create.id) + + a.service_principals.patch(id=sp.id, + operations=[iam.Patch(op=iam.PatchOp.REPLACE, path="active", value="false")], + schemas=[iam.PatchSchema.URN_IETF_PARAMS_SCIM_API_MESSAGES_2_0_PATCH_OP]) + + # cleanup + a.service_principals.delete(id=sp_create.id) + + Update service principal details. + + Partially updates the details of a single service principal in the Databricks account. + + :param id: str + Unique ID for a service principal in the Databricks account. + :param operations: List[:class:`Patch`] (optional) + :param schemas: List[:class:`PatchSchema`] (optional) + The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. + + + + + .. py:method:: update(id: str [, active: Optional[bool], application_id: Optional[str], display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], roles: Optional[List[ComplexValue]], schemas: Optional[List[ServicePrincipalSchema]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + created = w.service_principals.create(display_name=f'sdk-{time.time_ns()}') + + w.service_principals.update(id=created.id, + display_name=f'sdk-{time.time_ns()}', + roles=[iam.ComplexValue(value="xyz")]) + + # cleanup + w.service_principals.delete(id=created.id) + + Replace service principal. + + Updates the details of a single service principal. + + This action replaces the existing service principal with the same name. + + :param id: str + Databricks service principal ID. + :param active: bool (optional) + If this user is active + :param application_id: str (optional) + UUID relating to the service principal + :param display_name: str (optional) + String that represents a concatenation of given and family names. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the service principal. See [assigning entitlements] for a full list of + supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`ServicePrincipalSchema`] (optional) + The schema of the List response. + + + \ No newline at end of file diff --git a/docs/account/iam/users.rst b/docs/account/iam/users.rst new file mode 100644 index 000000000..54a113542 --- /dev/null +++ b/docs/account/iam/users.rst @@ -0,0 +1,276 @@ +``a.users``: Account Users +========================== +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: AccountUsersAPI + + User identities recognized by Databricks and represented by email addresses. + + Databricks recommends using SCIM provisioning to sync users and groups automatically from your identity + provider to your Databricks account. SCIM streamlines onboarding a new employee or team by using your + identity provider to create users and groups in Databricks account and give them the proper level of + access. When a user leaves your organization or no longer needs access to Databricks account, admins can + terminate the user in your identity provider and that user’s account will also be removed from + Databricks account. This ensures a consistent offboarding process and prevents unauthorized users from + accessing sensitive data. + + .. py:method:: create( [, active: Optional[bool], display_name: Optional[str], emails: Optional[List[ComplexValue]], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], id: Optional[str], name: Optional[Name], roles: Optional[List[ComplexValue]], schemas: Optional[List[UserSchema]], user_name: Optional[str]]) -> User + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + user = a.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + # cleanup + a.users.delete(id=user.id) + + Create a new user. + + Creates a new user in the Databricks account. This new user will also be added to the Databricks + account. + + :param active: bool (optional) + If this user is active + :param display_name: str (optional) + String that represents a concatenation of given and family names. For example `John Smith`. This + field cannot be updated through the Workspace SCIM APIs when [identity federation is enabled]. Use + Account SCIM APIs to update `displayName`. + + [identity federation is enabled]: https://docs.databricks.com/administration-guide/users-groups/best-practices.html#enable-identity-federation + :param emails: List[:class:`ComplexValue`] (optional) + All the emails associated with the Databricks user. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the user. See [assigning entitlements] for a full list of supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + External ID is not currently supported. It is reserved for future use. + :param groups: List[:class:`ComplexValue`] (optional) + :param id: str (optional) + Databricks user ID. This is automatically set by Databricks. Any value provided by the client will + be ignored. + :param name: :class:`Name` (optional) + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`UserSchema`] (optional) + The schema of the user. + :param user_name: str (optional) + Email address of the Databricks user. + + :returns: :class:`User` + + + .. py:method:: delete(id: str) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + other_owner = w.users.create(user_name=f'sdk-{time.time_ns()}@example.com') + + w.users.delete(id=other_owner.id) + + Delete a user. + + Deletes a user. Deleting a user from a Databricks account also removes objects associated with the + user. + + :param id: str + Unique ID for a user in the Databricks account. + + + + + .. py:method:: get(id: str [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[GetSortOrder], start_index: Optional[int]]) -> User + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + user = a.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + by_id = a.users.get(id=user.id) + + # cleanup + a.users.delete(id=user.id) + + Get user details. + + Gets information for a specific user in Databricks account. + + :param id: str + Unique ID for a user in the Databricks account. + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. Default is 10000. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. Multi-part paths are supported. For example, `userName`, + `name.givenName`, and `emails`. + :param sort_order: :class:`GetSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: :class:`User` + + + .. py:method:: list( [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[ListSortOrder], start_index: Optional[int]]) -> Iterator[User] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + all_users = w.users.list(attributes="id,userName", + sort_by="userName", + sort_order=iam.ListSortOrder.DESCENDING) + + List users. + + Gets details for all the users associated with a Databricks account. + + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. Default is 10000. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. Multi-part paths are supported. For example, `userName`, + `name.givenName`, and `emails`. + :param sort_order: :class:`ListSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: Iterator over :class:`User` + + + .. py:method:: patch(id: str [, operations: Optional[List[Patch]], schemas: Optional[List[PatchSchema]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + user = w.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + w.users.patch(id=user.id, + operations=[iam.Patch(op=iam.PatchOp.REPLACE, path="active", value="false")], + schemas=[iam.PatchSchema.URN_IETF_PARAMS_SCIM_API_MESSAGES_2_0_PATCH_OP]) + + Update user details. + + Partially updates a user resource by applying the supplied operations on specific user attributes. + + :param id: str + Unique ID for a user in the Databricks account. + :param operations: List[:class:`Patch`] (optional) + :param schemas: List[:class:`PatchSchema`] (optional) + The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. + + + + + .. py:method:: update(id: str [, active: Optional[bool], display_name: Optional[str], emails: Optional[List[ComplexValue]], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], name: Optional[Name], roles: Optional[List[ComplexValue]], schemas: Optional[List[UserSchema]], user_name: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + user = w.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + w.users.update(id=user.id, user_name=user.user_name, active=True) + + Replace a user. + + Replaces a user's information with the data supplied in request. + + :param id: str + Databricks user ID. This is automatically set by Databricks. Any value provided by the client will + be ignored. + :param active: bool (optional) + If this user is active + :param display_name: str (optional) + String that represents a concatenation of given and family names. For example `John Smith`. This + field cannot be updated through the Workspace SCIM APIs when [identity federation is enabled]. Use + Account SCIM APIs to update `displayName`. + + [identity federation is enabled]: https://docs.databricks.com/administration-guide/users-groups/best-practices.html#enable-identity-federation + :param emails: List[:class:`ComplexValue`] (optional) + All the emails associated with the Databricks user. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the user. See [assigning entitlements] for a full list of supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + External ID is not currently supported. It is reserved for future use. + :param groups: List[:class:`ComplexValue`] (optional) + :param name: :class:`Name` (optional) + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`UserSchema`] (optional) + The schema of the user. + :param user_name: str (optional) + Email address of the Databricks user. + + + \ No newline at end of file diff --git a/docs/account/iam/workspace_assignment.rst b/docs/account/iam/workspace_assignment.rst new file mode 100644 index 000000000..a09af197c --- /dev/null +++ b/docs/account/iam/workspace_assignment.rst @@ -0,0 +1,102 @@ +``a.workspace_assignment``: Workspace Assignment +================================================ +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: WorkspaceAssignmentAPI + + The Workspace Permission Assignment API allows you to manage workspace permissions for principals in your + account. + + .. py:method:: delete(workspace_id: int, principal_id: int) + + Delete permissions assignment. + + Deletes the workspace permissions assignment in a given account and workspace for the specified + principal. + + :param workspace_id: int + The workspace ID. + :param principal_id: int + The ID of the user, service principal, or group. + + + + + .. py:method:: get(workspace_id: int) -> WorkspacePermissions + + List workspace permissions. + + Get an array of workspace permissions for the specified account and workspace. + + :param workspace_id: int + The workspace ID. + + :returns: :class:`WorkspacePermissions` + + + .. py:method:: list(workspace_id: int) -> Iterator[PermissionAssignment] + + + Usage: + + .. code-block:: + + import os + + from databricks.sdk import AccountClient + + a = AccountClient() + + workspace_id = os.environ["TEST_WORKSPACE_ID"] + + all = a.workspace_assignment.list(list=workspace_id) + + Get permission assignments. + + Get the permission assignments for the specified Databricks account and Databricks workspace. + + :param workspace_id: int + The workspace ID for the account. + + :returns: Iterator over :class:`PermissionAssignment` + + + .. py:method:: update(workspace_id: int, principal_id: int, permissions: List[WorkspacePermission]) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import iam + + a = AccountClient() + + spn = a.service_principals.create(display_name=f'sdk-{time.time_ns()}') + + spn_id = spn.id + + workspace_id = os.environ["DUMMY_WORKSPACE_ID"] + + a.workspace_assignment.update(workspace_id=workspace_id, + principal_id=spn_id, + permissions=[iam.WorkspacePermission.USER]) + + Create or update permissions assignment. + + Creates or updates the workspace permissions assignment in a given account and workspace for the + specified principal. + + :param workspace_id: int + The workspace ID. + :param principal_id: int + The ID of the user, service principal, or group. + :param permissions: List[:class:`WorkspacePermission`] + Array of permissions assignments to update on the workspace. + + + \ No newline at end of file diff --git a/docs/account/index.rst b/docs/account/index.rst new file mode 100644 index 000000000..8179cb395 --- /dev/null +++ b/docs/account/index.rst @@ -0,0 +1,15 @@ + +Account APIs +============ + +These APIs are available from AccountClient + +.. toctree:: + :maxdepth: 1 + + iam/index + catalog/index + settings/index + provisioning/index + billing/index + oauth2/index \ No newline at end of file diff --git a/docs/account/oauth2/custom_app_integration.rst b/docs/account/oauth2/custom_app_integration.rst new file mode 100644 index 000000000..382ce0bd0 --- /dev/null +++ b/docs/account/oauth2/custom_app_integration.rst @@ -0,0 +1,82 @@ +``a.custom_app_integration``: OAuth Custom App Integration +========================================================== +.. currentmodule:: databricks.sdk.service.oauth2 + +.. py:class:: CustomAppIntegrationAPI + + These APIs enable administrators to manage custom oauth app integrations, which is required for + adding/using Custom OAuth App Integration like Tableau Cloud for Databricks in AWS cloud. + + .. py:method:: create(name: str, redirect_urls: List[str] [, confidential: Optional[bool], scopes: Optional[List[str]], token_access_policy: Optional[TokenAccessPolicy]]) -> CreateCustomAppIntegrationOutput + + Create Custom OAuth App Integration. + + Create Custom OAuth App Integration. + + You can retrieve the custom oauth app integration via :method:CustomAppIntegration/get. + + :param name: str + name of the custom oauth app + :param redirect_urls: List[str] + List of oauth redirect urls + :param confidential: bool (optional) + indicates if an oauth client-secret should be generated + :param scopes: List[str] (optional) + OAuth scopes granted to the application. Supported scopes: all-apis, sql, offline_access, openid, + profile, email. + :param token_access_policy: :class:`TokenAccessPolicy` (optional) + Token access policy + + :returns: :class:`CreateCustomAppIntegrationOutput` + + + .. py:method:: delete(integration_id: str) + + Delete Custom OAuth App Integration. + + Delete an existing Custom OAuth App Integration. You can retrieve the custom oauth app integration via + :method:CustomAppIntegration/get. + + :param integration_id: str + The oauth app integration ID. + + + + + .. py:method:: get(integration_id: str) -> GetCustomAppIntegrationOutput + + Get OAuth Custom App Integration. + + Gets the Custom OAuth App Integration for the given integration id. + + :param integration_id: str + The oauth app integration ID. + + :returns: :class:`GetCustomAppIntegrationOutput` + + + .. py:method:: list() -> Iterator[GetCustomAppIntegrationOutput] + + Get custom oauth app integrations. + + Get the list of custom oauth app integrations for the specified Databricks account + + :returns: Iterator over :class:`GetCustomAppIntegrationOutput` + + + .. py:method:: update(integration_id: str [, redirect_urls: Optional[List[str]], token_access_policy: Optional[TokenAccessPolicy]]) + + Updates Custom OAuth App Integration. + + Updates an existing custom OAuth App Integration. You can retrieve the custom oauth app integration + via :method:CustomAppIntegration/get. + + :param integration_id: str + The oauth app integration ID. + :param redirect_urls: List[str] (optional) + List of oauth redirect urls to be updated in the custom oauth app integration + :param token_access_policy: :class:`TokenAccessPolicy` (optional) + Token access policy to be updated in the custom oauth app integration + + + \ No newline at end of file diff --git a/docs/account/oauth2/index.rst b/docs/account/oauth2/index.rst new file mode 100644 index 000000000..a4663ef6b --- /dev/null +++ b/docs/account/oauth2/index.rst @@ -0,0 +1,13 @@ + +OAuth +===== + +Configure OAuth 2.0 application registrations for Databricks + +.. toctree:: + :maxdepth: 1 + + custom_app_integration + o_auth_published_apps + published_app_integration + service_principal_secrets \ No newline at end of file diff --git a/docs/account/oauth2/o_auth_published_apps.rst b/docs/account/oauth2/o_auth_published_apps.rst new file mode 100644 index 000000000..69aecb8ad --- /dev/null +++ b/docs/account/oauth2/o_auth_published_apps.rst @@ -0,0 +1,23 @@ +``a.o_auth_published_apps``: OAuth Published App +================================================ +.. currentmodule:: databricks.sdk.service.oauth2 + +.. py:class:: OAuthPublishedAppsAPI + + These APIs enable administrators to view all the available published OAuth applications in Databricks. + Administrators can add the published OAuth applications to their account through the OAuth Published App + Integration APIs. + + .. py:method:: list( [, page_size: Optional[int], page_token: Optional[str]]) -> Iterator[PublishedAppOutput] + + Get all the published OAuth apps. + + Get all the available published OAuth apps in Databricks. + + :param page_size: int (optional) + The max number of OAuth published apps to return. + :param page_token: str (optional) + A token that can be used to get the next page of results. + + :returns: Iterator over :class:`PublishedAppOutput` + \ No newline at end of file diff --git a/docs/account/oauth2/published_app_integration.rst b/docs/account/oauth2/published_app_integration.rst new file mode 100644 index 000000000..0488415cd --- /dev/null +++ b/docs/account/oauth2/published_app_integration.rst @@ -0,0 +1,73 @@ +``a.published_app_integration``: OAuth Published App Integration +================================================================ +.. currentmodule:: databricks.sdk.service.oauth2 + +.. py:class:: PublishedAppIntegrationAPI + + These APIs enable administrators to manage published oauth app integrations, which is required for + adding/using Published OAuth App Integration like Tableau Desktop for Databricks in AWS cloud. + + .. py:method:: create( [, app_id: Optional[str], token_access_policy: Optional[TokenAccessPolicy]]) -> CreatePublishedAppIntegrationOutput + + Create Published OAuth App Integration. + + Create Published OAuth App Integration. + + You can retrieve the published oauth app integration via :method:PublishedAppIntegration/get. + + :param app_id: str (optional) + app_id of the oauth published app integration. For example power-bi, tableau-deskop + :param token_access_policy: :class:`TokenAccessPolicy` (optional) + Token access policy + + :returns: :class:`CreatePublishedAppIntegrationOutput` + + + .. py:method:: delete(integration_id: str) + + Delete Published OAuth App Integration. + + Delete an existing Published OAuth App Integration. You can retrieve the published oauth app + integration via :method:PublishedAppIntegration/get. + + :param integration_id: str + The oauth app integration ID. + + + + + .. py:method:: get(integration_id: str) -> GetPublishedAppIntegrationOutput + + Get OAuth Published App Integration. + + Gets the Published OAuth App Integration for the given integration id. + + :param integration_id: str + The oauth app integration ID. + + :returns: :class:`GetPublishedAppIntegrationOutput` + + + .. py:method:: list() -> Iterator[GetPublishedAppIntegrationOutput] + + Get published oauth app integrations. + + Get the list of published oauth app integrations for the specified Databricks account + + :returns: Iterator over :class:`GetPublishedAppIntegrationOutput` + + + .. py:method:: update(integration_id: str [, token_access_policy: Optional[TokenAccessPolicy]]) + + Updates Published OAuth App Integration. + + Updates an existing published OAuth App Integration. You can retrieve the published oauth app + integration via :method:PublishedAppIntegration/get. + + :param integration_id: str + The oauth app integration ID. + :param token_access_policy: :class:`TokenAccessPolicy` (optional) + Token access policy to be updated in the published oauth app integration + + + \ No newline at end of file diff --git a/docs/account/oauth2/service_principal_secrets.rst b/docs/account/oauth2/service_principal_secrets.rst new file mode 100644 index 000000000..4249b9dea --- /dev/null +++ b/docs/account/oauth2/service_principal_secrets.rst @@ -0,0 +1,56 @@ +``a.service_principal_secrets``: Service Principal Secrets +========================================================== +.. currentmodule:: databricks.sdk.service.oauth2 + +.. py:class:: ServicePrincipalSecretsAPI + + These APIs enable administrators to manage service principal secrets. + + You can use the generated secrets to obtain OAuth access tokens for a service principal, which can then be + used to access Databricks Accounts and Workspace APIs. For more information, see [Authentication using + OAuth tokens for service principals], + + In addition, the generated secrets can be used to configure the Databricks Terraform Provider to + authenticate with the service principal. For more information, see [Databricks Terraform Provider]. + + [Authentication using OAuth tokens for service principals]: https://docs.databricks.com/dev-tools/authentication-oauth.html + [Databricks Terraform Provider]: https://github.com/databricks/terraform-provider-databricks/blob/master/docs/index.md#authenticating-with-service-principal + + .. py:method:: create(service_principal_id: int) -> CreateServicePrincipalSecretResponse + + Create service principal secret. + + Create a secret for the given service principal. + + :param service_principal_id: int + The service principal ID. + + :returns: :class:`CreateServicePrincipalSecretResponse` + + + .. py:method:: delete(service_principal_id: int, secret_id: str) + + Delete service principal secret. + + Delete a secret from the given service principal. + + :param service_principal_id: int + The service principal ID. + :param secret_id: str + The secret ID. + + + + + .. py:method:: list(service_principal_id: int) -> Iterator[SecretInfo] + + List service principal secrets. + + List all secrets associated with the given service principal. This operation only returns information + about the secrets themselves and does not include the secret values. + + :param service_principal_id: int + The service principal ID. + + :returns: Iterator over :class:`SecretInfo` + \ No newline at end of file diff --git a/docs/account/provisioning/credentials.rst b/docs/account/provisioning/credentials.rst new file mode 100644 index 000000000..5255a6a29 --- /dev/null +++ b/docs/account/provisioning/credentials.rst @@ -0,0 +1,123 @@ +``a.credentials``: Credential configurations +============================================ +.. currentmodule:: databricks.sdk.service.provisioning + +.. py:class:: CredentialsAPI + + These APIs manage credential configurations for this workspace. Databricks needs access to a cross-account + service IAM role in your AWS account so that Databricks can deploy clusters in the appropriate VPC for the + new workspace. A credential configuration encapsulates this role information, and its ID is used when + creating a new workspace. + + .. py:method:: create(credentials_name: str, aws_credentials: CreateCredentialAwsCredentials) -> Credential + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + role = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_CROSSACCOUNT_ARN"]))) + + # cleanup + a.credentials.delete(credentials_id=role.credentials_id) + + Create credential configuration. + + Creates a Databricks credential configuration that represents cloud cross-account credentials for a + specified account. Databricks uses this to set up network infrastructure properly to host Databricks + clusters. For your AWS IAM role, you need to trust the External ID (the Databricks Account API account + ID) in the returned credential object, and configure the required access policy. + + Save the response's `credentials_id` field, which is the ID for your new credential configuration + object. + + For information about how to create a new workspace with this API, see [Create a new workspace using + the Account API] + + [Create a new workspace using the Account API]: http://docs.databricks.com/administration-guide/account-api/new-workspace.html + + :param credentials_name: str + The human-readable name of the credential configuration object. + :param aws_credentials: :class:`CreateCredentialAwsCredentials` + + :returns: :class:`Credential` + + + .. py:method:: delete(credentials_id: str) + + Delete credential configuration. + + Deletes a Databricks credential configuration object for an account, both specified by ID. You cannot + delete a credential that is associated with any workspace. + + :param credentials_id: str + Databricks Account API credential configuration ID + + + + + .. py:method:: get(credentials_id: str) -> Credential + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + role = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_CROSSACCOUNT_ARN"]))) + + by_id = a.credentials.get(credentials_id=role.credentials_id) + + # cleanup + a.credentials.delete(credentials_id=role.credentials_id) + + Get credential configuration. + + Gets a Databricks credential configuration object for an account, both specified by ID. + + :param credentials_id: str + Databricks Account API credential configuration ID + + :returns: :class:`Credential` + + + .. py:method:: list() -> Iterator[Credential] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + configs = a.credentials.list() + + Get all credential configurations. + + Gets all Databricks credential configurations associated with an account specified by ID. + + :returns: Iterator over :class:`Credential` + \ No newline at end of file diff --git a/docs/account/provisioning/encryption_keys.rst b/docs/account/provisioning/encryption_keys.rst new file mode 100644 index 000000000..c711727c5 --- /dev/null +++ b/docs/account/provisioning/encryption_keys.rst @@ -0,0 +1,150 @@ +``a.encryption_keys``: Key configurations +========================================= +.. currentmodule:: databricks.sdk.service.provisioning + +.. py:class:: EncryptionKeysAPI + + These APIs manage encryption key configurations for this workspace (optional). A key configuration + encapsulates the AWS KMS key information and some information about how the key configuration can be used. + There are two possible uses for key configurations: + + * Managed services: A key configuration can be used to encrypt a workspace's notebook and secret data in + the control plane, as well as Databricks SQL queries and query history. * Storage: A key configuration can + be used to encrypt a workspace's DBFS and EBS data in the data plane. + + In both of these cases, the key configuration's ID is used when creating a new workspace. This Preview + feature is available if your account is on the E2 version of the platform. Updating a running workspace + with workspace storage encryption requires that the workspace is on the E2 version of the platform. If you + have an older workspace, it might not be on the E2 version of the platform. If you are not sure, contact + your Databricks representative. + + .. py:method:: create(use_cases: List[KeyUseCase] [, aws_key_info: Optional[CreateAwsKeyInfo], gcp_key_info: Optional[CreateGcpKeyInfo]]) -> CustomerManagedKey + + + Usage: + + .. code-block:: + + import os + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + created = a.encryption_keys.create(aws_key_info=provisioning.CreateAwsKeyInfo( + key_arn=os.environ["TEST_MANAGED_KMS_KEY_ARN"], key_alias=os.environ["TEST_STORAGE_KMS_KEY_ALIAS"]), + use_cases=[provisioning.KeyUseCase.MANAGED_SERVICES]) + + # cleanup + a.encryption_keys.delete(customer_managed_key_id=created.customer_managed_key_id) + + Create encryption key configuration. + + Creates a customer-managed key configuration object for an account, specified by ID. This operation + uploads a reference to a customer-managed key to Databricks. If the key is assigned as a workspace's + customer-managed key for managed services, Databricks uses the key to encrypt the workspaces notebooks + and secrets in the control plane, in addition to Databricks SQL queries and query history. If it is + specified as a workspace's customer-managed key for workspace storage, the key encrypts the + workspace's root S3 bucket (which contains the workspace's root DBFS and system data) and, optionally, + cluster EBS volume data. + + **Important**: Customer-managed keys are supported only for some deployment types, subscription types, + and AWS regions that currently support creation of Databricks workspaces. + + This operation is available only if your account is on the E2 version of the platform or on a select + custom plan that allows multiple workspaces per account. + + :param use_cases: List[:class:`KeyUseCase`] + The cases that the key can be used for. + :param aws_key_info: :class:`CreateAwsKeyInfo` (optional) + :param gcp_key_info: :class:`CreateGcpKeyInfo` (optional) + + :returns: :class:`CustomerManagedKey` + + + .. py:method:: delete(customer_managed_key_id: str) + + Delete encryption key configuration. + + Deletes a customer-managed key configuration object for an account. You cannot delete a configuration + that is associated with a running workspace. + + :param customer_managed_key_id: str + Databricks encryption key configuration ID. + + + + + .. py:method:: get(customer_managed_key_id: str) -> CustomerManagedKey + + + Usage: + + .. code-block:: + + import os + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + created = a.encryption_keys.create(aws_key_info=provisioning.CreateAwsKeyInfo( + key_arn=os.environ["TEST_MANAGED_KMS_KEY_ARN"], key_alias=os.environ["TEST_STORAGE_KMS_KEY_ALIAS"]), + use_cases=[provisioning.KeyUseCase.MANAGED_SERVICES]) + + by_id = a.encryption_keys.get(customer_managed_key_id=created.customer_managed_key_id) + + # cleanup + a.encryption_keys.delete(customer_managed_key_id=created.customer_managed_key_id) + + Get encryption key configuration. + + Gets a customer-managed key configuration object for an account, specified by ID. This operation + uploads a reference to a customer-managed key to Databricks. If assigned as a workspace's + customer-managed key for managed services, Databricks uses the key to encrypt the workspaces notebooks + and secrets in the control plane, in addition to Databricks SQL queries and query history. If it is + specified as a workspace's customer-managed key for storage, the key encrypts the workspace's root S3 + bucket (which contains the workspace's root DBFS and system data) and, optionally, cluster EBS volume + data. + + **Important**: Customer-managed keys are supported only for some deployment types, subscription types, + and AWS regions. + + This operation is available only if your account is on the E2 version of the platform.", + + :param customer_managed_key_id: str + Databricks encryption key configuration ID. + + :returns: :class:`CustomerManagedKey` + + + .. py:method:: list() -> Iterator[CustomerManagedKey] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + all = a.encryption_keys.list() + + Get all encryption key configurations. + + Gets all customer-managed key configuration objects for an account. If the key is specified as a + workspace's managed services customer-managed key, Databricks uses the key to encrypt the workspace's + notebooks and secrets in the control plane, in addition to Databricks SQL queries and query history. + If the key is specified as a workspace's storage customer-managed key, the key is used to encrypt the + workspace's root S3 bucket and optionally can encrypt cluster EBS volumes data in the data plane. + + **Important**: Customer-managed keys are supported only for some deployment types, subscription types, + and AWS regions. + + This operation is available only if your account is on the E2 version of the platform. + + :returns: Iterator over :class:`CustomerManagedKey` + \ No newline at end of file diff --git a/docs/account/provisioning/index.rst b/docs/account/provisioning/index.rst new file mode 100644 index 000000000..46a328b68 --- /dev/null +++ b/docs/account/provisioning/index.rst @@ -0,0 +1,16 @@ + +Provisioning +============ + +Resource management for secure Databricks Workspace deployment, cross-account IAM roles, storage, encryption, networking and private access. + +.. toctree:: + :maxdepth: 1 + + credentials + encryption_keys + networks + private_access + storage + vpc_endpoints + workspaces \ No newline at end of file diff --git a/docs/account/provisioning/networks.rst b/docs/account/provisioning/networks.rst new file mode 100644 index 000000000..e7491f202 --- /dev/null +++ b/docs/account/provisioning/networks.rst @@ -0,0 +1,123 @@ +``a.networks``: Network configurations +====================================== +.. currentmodule:: databricks.sdk.service.provisioning + +.. py:class:: NetworksAPI + + These APIs manage network configurations for customer-managed VPCs (optional). Its ID is used when + creating a new workspace if you use customer-managed VPCs. + + .. py:method:: create(network_name: str [, gcp_network_info: Optional[GcpNetworkInfo], security_group_ids: Optional[List[str]], subnet_ids: Optional[List[str]], vpc_endpoints: Optional[NetworkVpcEndpoints], vpc_id: Optional[str]]) -> Network + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + netw = a.networks.create(network_name=f'sdk-{time.time_ns()}', + vpc_id=hex(time.time_ns())[2:], + subnet_ids=[hex(time.time_ns())[2:], + hex(time.time_ns())[2:]], + security_group_ids=[hex(time.time_ns())[2:]]) + + Create network configuration. + + Creates a Databricks network configuration that represents an VPC and its resources. The VPC will be + used for new Databricks clusters. This requires a pre-existing VPC and subnets. + + :param network_name: str + The human-readable name of the network configuration. + :param gcp_network_info: :class:`GcpNetworkInfo` (optional) + The Google Cloud specific information for this network (for example, the VPC ID, subnet ID, and + secondary IP ranges). + :param security_group_ids: List[str] (optional) + IDs of one to five security groups associated with this network. Security group IDs **cannot** be + used in multiple network configurations. + :param subnet_ids: List[str] (optional) + IDs of at least two subnets associated with this network. Subnet IDs **cannot** be used in multiple + network configurations. + :param vpc_endpoints: :class:`NetworkVpcEndpoints` (optional) + If specified, contains the VPC endpoints used to allow cluster communication from this VPC over [AWS + PrivateLink]. + + [AWS PrivateLink]: https://aws.amazon.com/privatelink/ + :param vpc_id: str (optional) + The ID of the VPC associated with this network. VPC IDs can be used in multiple network + configurations. + + :returns: :class:`Network` + + + .. py:method:: delete(network_id: str) + + Delete a network configuration. + + Deletes a Databricks network configuration, which represents a cloud VPC and its resources. You cannot + delete a network that is associated with a workspace. + + This operation is available only if your account is on the E2 version of the platform. + + :param network_id: str + Databricks Account API network configuration ID. + + + + + .. py:method:: get(network_id: str) -> Network + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + netw = a.networks.create(network_name=f'sdk-{time.time_ns()}', + vpc_id=hex(time.time_ns())[2:], + subnet_ids=[hex(time.time_ns())[2:], + hex(time.time_ns())[2:]], + security_group_ids=[hex(time.time_ns())[2:]]) + + by_id = a.networks.get(network_id=netw.network_id) + + Get a network configuration. + + Gets a Databricks network configuration, which represents a cloud VPC and its resources. + + :param network_id: str + Databricks Account API network configuration ID. + + :returns: :class:`Network` + + + .. py:method:: list() -> Iterator[Network] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + configs = a.networks.list() + + Get all network configurations. + + Gets a list of all Databricks network configurations for an account, specified by ID. + + This operation is available only if your account is on the E2 version of the platform. + + :returns: Iterator over :class:`Network` + \ No newline at end of file diff --git a/docs/account/provisioning/private_access.rst b/docs/account/provisioning/private_access.rst new file mode 100644 index 000000000..10022068e --- /dev/null +++ b/docs/account/provisioning/private_access.rst @@ -0,0 +1,226 @@ +``a.private_access``: Private Access Settings +============================================= +.. currentmodule:: databricks.sdk.service.provisioning + +.. py:class:: PrivateAccessAPI + + These APIs manage private access settings for this account. + + .. py:method:: create(private_access_settings_name: str, region: str [, allowed_vpc_endpoint_ids: Optional[List[str]], private_access_level: Optional[PrivateAccessLevel], public_access_enabled: Optional[bool]]) -> PrivateAccessSettings + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + created = a.private_access.create(private_access_settings_name=f'sdk-{time.time_ns()}', + region=os.environ["AWS_REGION"]) + + # cleanup + a.private_access.delete(private_access_settings_id=created.private_access_settings_id) + + Create private access settings. + + Creates a private access settings object, which specifies how your workspace is accessed over [AWS + PrivateLink]. To use AWS PrivateLink, a workspace must have a private access settings object + referenced by ID in the workspace's `private_access_settings_id` property. + + You can share one private access settings with multiple workspaces in a single account. However, + private access settings are specific to AWS regions, so only workspaces in the same AWS region can use + a given private access settings object. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink]. + + [AWS PrivateLink]: https://aws.amazon.com/privatelink + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + + :param private_access_settings_name: str + The human-readable name of the private access settings object. + :param region: str + The cloud region for workspaces associated with this private access settings object. + :param allowed_vpc_endpoint_ids: List[str] (optional) + An array of Databricks VPC endpoint IDs. This is the Databricks ID that is returned when registering + the VPC endpoint configuration in your Databricks account. This is not the ID of the VPC endpoint in + AWS. + + Only used when `private_access_level` is set to `ENDPOINT`. This is an allow list of VPC endpoints + that in your account that can connect to your workspace over AWS PrivateLink. + + If hybrid access to your workspace is enabled by setting `public_access_enabled` to `true`, this + control only works for PrivateLink connections. To control how your workspace is accessed via public + internet, see [IP access lists]. + + [IP access lists]: https://docs.databricks.com/security/network/ip-access-list.html + :param private_access_level: :class:`PrivateAccessLevel` (optional) + The private access level controls which VPC endpoints can connect to the UI or API of any workspace + that attaches this private access settings object. * `ACCOUNT` level access (the default) allows + only VPC endpoints that are registered in your Databricks account connect to your workspace. * + `ENDPOINT` level access allows only specified VPC endpoints connect to your workspace. For details, + see `allowed_vpc_endpoint_ids`. + :param public_access_enabled: bool (optional) + Determines if the workspace can be accessed over public internet. For fully private workspaces, you + can optionally specify `false`, but only if you implement both the front-end and the back-end + PrivateLink connections. Otherwise, specify `true`, which means that public access is enabled. + + :returns: :class:`PrivateAccessSettings` + + + .. py:method:: delete(private_access_settings_id: str) + + Delete a private access settings object. + + Deletes a private access settings object, which determines how your workspace is accessed over [AWS + PrivateLink]. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink].", + + [AWS PrivateLink]: https://aws.amazon.com/privatelink + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + + :param private_access_settings_id: str + Databricks Account API private access settings ID. + + + + + .. py:method:: get(private_access_settings_id: str) -> PrivateAccessSettings + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + created = a.private_access.create(private_access_settings_name=f'sdk-{time.time_ns()}', + region=os.environ["AWS_REGION"]) + + by_id = a.private_access.get(private_access_settings_id=created.private_access_settings_id) + + # cleanup + a.private_access.delete(private_access_settings_id=created.private_access_settings_id) + + Get a private access settings object. + + Gets a private access settings object, which specifies how your workspace is accessed over [AWS + PrivateLink]. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink].", + + [AWS PrivateLink]: https://aws.amazon.com/privatelink + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + + :param private_access_settings_id: str + Databricks Account API private access settings ID. + + :returns: :class:`PrivateAccessSettings` + + + .. py:method:: list() -> Iterator[PrivateAccessSettings] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + all = a.private_access.list() + + Get all private access settings objects. + + Gets a list of all private access settings objects for an account, specified by ID. + + :returns: Iterator over :class:`PrivateAccessSettings` + + + .. py:method:: replace(private_access_settings_id: str, private_access_settings_name: str, region: str [, allowed_vpc_endpoint_ids: Optional[List[str]], private_access_level: Optional[PrivateAccessLevel], public_access_enabled: Optional[bool]]) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + created = a.private_access.create(private_access_settings_name=f'sdk-{time.time_ns()}', + region=os.environ["AWS_REGION"]) + + a.private_access.replace(private_access_settings_id=created.private_access_settings_id, + private_access_settings_name=f'sdk-{time.time_ns()}', + region=os.environ["AWS_REGION"]) + + # cleanup + a.private_access.delete(private_access_settings_id=created.private_access_settings_id) + + Replace private access settings. + + Updates an existing private access settings object, which specifies how your workspace is accessed + over [AWS PrivateLink]. To use AWS PrivateLink, a workspace must have a private access settings object + referenced by ID in the workspace's `private_access_settings_id` property. + + This operation completely overwrites your existing private access settings object attached to your + workspaces. All workspaces attached to the private access settings are affected by any change. If + `public_access_enabled`, `private_access_level`, or `allowed_vpc_endpoint_ids` are updated, effects of + these changes might take several minutes to propagate to the workspace API. + + You can share one private access settings object with multiple workspaces in a single account. + However, private access settings are specific to AWS regions, so only workspaces in the same AWS + region can use a given private access settings object. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink]. + + [AWS PrivateLink]: https://aws.amazon.com/privatelink + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + + :param private_access_settings_id: str + Databricks Account API private access settings ID. + :param private_access_settings_name: str + The human-readable name of the private access settings object. + :param region: str + The cloud region for workspaces associated with this private access settings object. + :param allowed_vpc_endpoint_ids: List[str] (optional) + An array of Databricks VPC endpoint IDs. This is the Databricks ID that is returned when registering + the VPC endpoint configuration in your Databricks account. This is not the ID of the VPC endpoint in + AWS. + + Only used when `private_access_level` is set to `ENDPOINT`. This is an allow list of VPC endpoints + that in your account that can connect to your workspace over AWS PrivateLink. + + If hybrid access to your workspace is enabled by setting `public_access_enabled` to `true`, this + control only works for PrivateLink connections. To control how your workspace is accessed via public + internet, see [IP access lists]. + + [IP access lists]: https://docs.databricks.com/security/network/ip-access-list.html + :param private_access_level: :class:`PrivateAccessLevel` (optional) + The private access level controls which VPC endpoints can connect to the UI or API of any workspace + that attaches this private access settings object. * `ACCOUNT` level access (the default) allows + only VPC endpoints that are registered in your Databricks account connect to your workspace. * + `ENDPOINT` level access allows only specified VPC endpoints connect to your workspace. For details, + see `allowed_vpc_endpoint_ids`. + :param public_access_enabled: bool (optional) + Determines if the workspace can be accessed over public internet. For fully private workspaces, you + can optionally specify `false`, but only if you implement both the front-end and the back-end + PrivateLink connections. Otherwise, specify `true`, which means that public access is enabled. + + + \ No newline at end of file diff --git a/docs/account/provisioning/storage.rst b/docs/account/provisioning/storage.rst new file mode 100644 index 000000000..611a8cdc6 --- /dev/null +++ b/docs/account/provisioning/storage.rst @@ -0,0 +1,114 @@ +``a.storage``: Storage configurations +===================================== +.. currentmodule:: databricks.sdk.service.provisioning + +.. py:class:: StorageAPI + + These APIs manage storage configurations for this workspace. A root storage S3 bucket in your account is + required to store objects like cluster logs, notebook revisions, and job results. You can also use the + root storage S3 bucket for storage of non-production DBFS data. A storage configuration encapsulates this + bucket information, and its ID is used when creating a new workspace. + + .. py:method:: create(storage_configuration_name: str, root_bucket_info: RootBucketInfo) -> StorageConfiguration + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + storage = a.storage.create( + storage_configuration_name=f'sdk-{time.time_ns()}', + root_bucket_info=provisioning.RootBucketInfo(bucket_name=os.environ["TEST_ROOT_BUCKET"])) + + # cleanup + a.storage.delete(storage_configuration_id=storage.storage_configuration_id) + + Create new storage configuration. + + Creates new storage configuration for an account, specified by ID. Uploads a storage configuration + object that represents the root AWS S3 bucket in your account. Databricks stores related workspace + assets including DBFS, cluster logs, and job results. For the AWS S3 bucket, you need to configure the + required bucket policy. + + For information about how to create a new workspace with this API, see [Create a new workspace using + the Account API] + + [Create a new workspace using the Account API]: http://docs.databricks.com/administration-guide/account-api/new-workspace.html + + :param storage_configuration_name: str + The human-readable name of the storage configuration. + :param root_bucket_info: :class:`RootBucketInfo` + Root S3 bucket information. + + :returns: :class:`StorageConfiguration` + + + .. py:method:: delete(storage_configuration_id: str) + + Delete storage configuration. + + Deletes a Databricks storage configuration. You cannot delete a storage configuration that is + associated with any workspace. + + :param storage_configuration_id: str + Databricks Account API storage configuration ID. + + + + + .. py:method:: get(storage_configuration_id: str) -> StorageConfiguration + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + storage = a.storage.create(storage_configuration_name=f'sdk-{time.time_ns()}', + root_bucket_info=provisioning.RootBucketInfo(bucket_name=f'sdk-{time.time_ns()}')) + + by_id = a.storage.get(storage_configuration_id=storage.storage_configuration_id) + + Get storage configuration. + + Gets a Databricks storage configuration for an account, both specified by ID. + + :param storage_configuration_id: str + Databricks Account API storage configuration ID. + + :returns: :class:`StorageConfiguration` + + + .. py:method:: list() -> Iterator[StorageConfiguration] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + configs = a.storage.list() + + Get all storage configurations. + + Gets a list of all Databricks storage configurations for your account, specified by ID. + + :returns: Iterator over :class:`StorageConfiguration` + \ No newline at end of file diff --git a/docs/account/provisioning/vpc_endpoints.rst b/docs/account/provisioning/vpc_endpoints.rst new file mode 100644 index 000000000..d2622dc0f --- /dev/null +++ b/docs/account/provisioning/vpc_endpoints.rst @@ -0,0 +1,135 @@ +``a.vpc_endpoints``: VPC Endpoint Configurations +================================================ +.. currentmodule:: databricks.sdk.service.provisioning + +.. py:class:: VpcEndpointsAPI + + These APIs manage VPC endpoint configurations for this account. + + .. py:method:: create(vpc_endpoint_name: str [, aws_vpc_endpoint_id: Optional[str], gcp_vpc_endpoint_info: Optional[GcpVpcEndpointInfo], region: Optional[str]]) -> VpcEndpoint + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + created = a.vpc_endpoints.create(aws_vpc_endpoint_id=os.environ["TEST_RELAY_VPC_ENDPOINT"], + region=os.environ["AWS_REGION"], + vpc_endpoint_name=f'sdk-{time.time_ns()}') + + # cleanup + a.vpc_endpoints.delete(vpc_endpoint_id=created.vpc_endpoint_id) + + Create VPC endpoint configuration. + + Creates a VPC endpoint configuration, which represents a [VPC endpoint] object in AWS used to + communicate privately with Databricks over [AWS PrivateLink]. + + After you create the VPC endpoint configuration, the Databricks [endpoint service] automatically + accepts the VPC endpoint. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink]. + + [AWS PrivateLink]: https://aws.amazon.com/privatelink + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + [VPC endpoint]: https://docs.aws.amazon.com/vpc/latest/privatelink/vpc-endpoints.html + [endpoint service]: https://docs.aws.amazon.com/vpc/latest/privatelink/privatelink-share-your-services.html + + :param vpc_endpoint_name: str + The human-readable name of the storage configuration. + :param aws_vpc_endpoint_id: str (optional) + The ID of the VPC endpoint object in AWS. + :param gcp_vpc_endpoint_info: :class:`GcpVpcEndpointInfo` (optional) + The Google Cloud specific information for this Private Service Connect endpoint. + :param region: str (optional) + The AWS region in which this VPC endpoint object exists. + + :returns: :class:`VpcEndpoint` + + + .. py:method:: delete(vpc_endpoint_id: str) + + Delete VPC endpoint configuration. + + Deletes a VPC endpoint configuration, which represents an [AWS VPC endpoint] that can communicate + privately with Databricks over [AWS PrivateLink]. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink]. + + [AWS PrivateLink]: https://aws.amazon.com/privatelink + [AWS VPC endpoint]: https://docs.aws.amazon.com/vpc/latest/privatelink/concepts.html + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + + :param vpc_endpoint_id: str + Databricks VPC endpoint ID. + + + + + .. py:method:: get(vpc_endpoint_id: str) -> VpcEndpoint + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + created = a.vpc_endpoints.create(aws_vpc_endpoint_id=os.environ["TEST_RELAY_VPC_ENDPOINT"], + region=os.environ["AWS_REGION"], + vpc_endpoint_name=f'sdk-{time.time_ns()}') + + by_id = a.vpc_endpoints.get(vpc_endpoint_id=created.vpc_endpoint_id) + + # cleanup + a.vpc_endpoints.delete(vpc_endpoint_id=created.vpc_endpoint_id) + + Get a VPC endpoint configuration. + + Gets a VPC endpoint configuration, which represents a [VPC endpoint] object in AWS used to communicate + privately with Databricks over [AWS PrivateLink]. + + [AWS PrivateLink]: https://aws.amazon.com/privatelink + [VPC endpoint]: https://docs.aws.amazon.com/vpc/latest/privatelink/concepts.html + + :param vpc_endpoint_id: str + Databricks VPC endpoint ID. + + :returns: :class:`VpcEndpoint` + + + .. py:method:: list() -> Iterator[VpcEndpoint] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + all = a.vpc_endpoints.list() + + Get all VPC endpoint configurations. + + Gets a list of all VPC endpoints for an account, specified by ID. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink]. + + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + + :returns: Iterator over :class:`VpcEndpoint` + \ No newline at end of file diff --git a/docs/account/provisioning/workspaces.rst b/docs/account/provisioning/workspaces.rst new file mode 100644 index 000000000..41f46f881 --- /dev/null +++ b/docs/account/provisioning/workspaces.rst @@ -0,0 +1,424 @@ +``a.workspaces``: Workspaces +============================ +.. currentmodule:: databricks.sdk.service.provisioning + +.. py:class:: WorkspacesAPI + + These APIs manage workspaces for this account. A Databricks workspace is an environment for accessing all + of your Databricks assets. The workspace organizes objects (notebooks, libraries, and experiments) into + folders, and provides access to data and computational resources such as clusters and jobs. + + These endpoints are available if your account is on the E2 version of the platform or on a select custom + plan that allows multiple workspaces per account. + + .. py:method:: create(workspace_name: str [, aws_region: Optional[str], cloud: Optional[str], cloud_resource_container: Optional[CloudResourceContainer], credentials_id: Optional[str], custom_tags: Optional[Dict[str, str]], deployment_name: Optional[str], gcp_managed_network_config: Optional[GcpManagedNetworkConfig], gke_config: Optional[GkeConfig], location: Optional[str], managed_services_customer_managed_key_id: Optional[str], network_id: Optional[str], pricing_tier: Optional[PricingTier], private_access_settings_id: Optional[str], storage_configuration_id: Optional[str], storage_customer_managed_key_id: Optional[str]]) -> Wait[Workspace] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + storage = a.storage.create( + storage_configuration_name=f'sdk-{time.time_ns()}', + root_bucket_info=provisioning.RootBucketInfo(bucket_name=os.environ["TEST_ROOT_BUCKET"])) + + role = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_CROSSACCOUNT_ARN"]))) + + created = a.workspaces.create(workspace_name=f'sdk-{time.time_ns()}', + aws_region=os.environ["AWS_REGION"], + credentials_id=role.credentials_id, + storage_configuration_id=storage.storage_configuration_id).result() + + # cleanup + a.storage.delete(storage_configuration_id=storage.storage_configuration_id) + a.credentials.delete(credentials_id=role.credentials_id) + a.workspaces.delete(workspace_id=created.workspace_id) + + Create a new workspace. + + Creates a new workspace. + + **Important**: This operation is asynchronous. A response with HTTP status code 200 means the request + has been accepted and is in progress, but does not mean that the workspace deployed successfully and + is running. The initial workspace status is typically `PROVISIONING`. Use the workspace ID + (`workspace_id`) field in the response to identify the new workspace and make repeated `GET` requests + with the workspace ID and check its status. The workspace becomes available when the status changes to + `RUNNING`. + + :param workspace_name: str + The workspace's human-readable name. + :param aws_region: str (optional) + The AWS region of the workspace's data plane. + :param cloud: str (optional) + The cloud provider which the workspace uses. For Google Cloud workspaces, always set this field to + `gcp`. + :param cloud_resource_container: :class:`CloudResourceContainer` (optional) + The general workspace configurations that are specific to cloud providers. + :param credentials_id: str (optional) + ID of the workspace's credential configuration object. + :param custom_tags: Dict[str,str] (optional) + The custom tags key-value pairing that is attached to this workspace. The key-value pair is a string + of utf-8 characters. The value can be an empty string, with maximum length of 255 characters. The + key can be of maximum length of 127 characters, and cannot be empty. + :param deployment_name: str (optional) + The deployment name defines part of the subdomain for the workspace. The workspace URL for the web + application and REST APIs is `.cloud.databricks.com`. For example, if the + deployment name is `abcsales`, your workspace URL will be `https://abcsales.cloud.databricks.com`. + Hyphens are allowed. This property supports only the set of characters that are allowed in a + subdomain. + + To set this value, you must have a deployment name prefix. Contact your Databricks account team to + add an account deployment name prefix to your account. + + Workspace deployment names follow the account prefix and a hyphen. For example, if your account's + deployment prefix is `acme` and the workspace deployment name is `workspace-1`, the JSON response + for the `deployment_name` field becomes `acme-workspace-1`. The workspace URL would be + `acme-workspace-1.cloud.databricks.com`. + + You can also set the `deployment_name` to the reserved keyword `EMPTY` if you want the deployment + name to only include the deployment prefix. For example, if your account's deployment prefix is + `acme` and the workspace deployment name is `EMPTY`, the `deployment_name` becomes `acme` only and + the workspace URL is `acme.cloud.databricks.com`. + + This value must be unique across all non-deleted deployments across all AWS regions. + + If a new workspace omits this property, the server generates a unique deployment name for you with + the pattern `dbc-xxxxxxxx-xxxx`. + :param gcp_managed_network_config: :class:`GcpManagedNetworkConfig` (optional) + The network settings for the workspace. The configurations are only for Databricks-managed VPCs. It + is ignored if you specify a customer-managed VPC in the `network_id` field.", All the IP range + configurations must be mutually exclusive. An attempt to create a workspace fails if Databricks + detects an IP range overlap. + + Specify custom IP ranges in CIDR format. The IP ranges for these fields must not overlap, and all IP + addresses must be entirely within the following ranges: `10.0.0.0/8`, `100.64.0.0/10`, + `172.16.0.0/12`, `192.168.0.0/16`, and `240.0.0.0/4`. + + The sizes of these IP ranges affect the maximum number of nodes for the workspace. + + **Important**: Confirm the IP ranges used by your Databricks workspace before creating the + workspace. You cannot change them after your workspace is deployed. If the IP address ranges for + your Databricks are too small, IP exhaustion can occur, causing your Databricks jobs to fail. To + determine the address range sizes that you need, Databricks provides a calculator as a Microsoft + Excel spreadsheet. See [calculate subnet sizes for a new workspace]. + + [calculate subnet sizes for a new workspace]: https://docs.gcp.databricks.com/administration-guide/cloud-configurations/gcp/network-sizing.html + :param gke_config: :class:`GkeConfig` (optional) + The configurations for the GKE cluster of a Databricks workspace. + :param location: str (optional) + The Google Cloud region of the workspace data plane in your Google account. For example, `us-east4`. + :param managed_services_customer_managed_key_id: str (optional) + The ID of the workspace's managed services encryption key configuration object. This is used to help + protect and control access to the workspace's notebooks, secrets, Databricks SQL queries, and query + history. The provided key configuration object property `use_cases` must contain `MANAGED_SERVICES`. + :param network_id: str (optional) + :param pricing_tier: :class:`PricingTier` (optional) + The pricing tier of the workspace. For pricing tier information, see [AWS Pricing]. + + [AWS Pricing]: https://databricks.com/product/aws-pricing + :param private_access_settings_id: str (optional) + ID of the workspace's private access settings object. Only used for PrivateLink. This ID must be + specified for customers using [AWS PrivateLink] for either front-end (user-to-workspace connection), + back-end (data plane to control plane connection), or both connection types. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink].", + + [AWS PrivateLink]: https://aws.amazon.com/privatelink/ + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + :param storage_configuration_id: str (optional) + The ID of the workspace's storage configuration object. + :param storage_customer_managed_key_id: str (optional) + The ID of the workspace's storage encryption key configuration object. This is used to encrypt the + workspace's root S3 bucket (root DBFS and system data) and, optionally, cluster EBS volumes. The + provided key configuration object property `use_cases` must contain `STORAGE`. + + :returns: + Long-running operation waiter for :class:`Workspace`. + See :method:wait_get_workspace_running for more details. + + + .. py:method:: create_and_wait(workspace_name: str [, aws_region: Optional[str], cloud: Optional[str], cloud_resource_container: Optional[CloudResourceContainer], credentials_id: Optional[str], custom_tags: Optional[Dict[str, str]], deployment_name: Optional[str], gcp_managed_network_config: Optional[GcpManagedNetworkConfig], gke_config: Optional[GkeConfig], location: Optional[str], managed_services_customer_managed_key_id: Optional[str], network_id: Optional[str], pricing_tier: Optional[PricingTier], private_access_settings_id: Optional[str], storage_configuration_id: Optional[str], storage_customer_managed_key_id: Optional[str], timeout: datetime.timedelta = 0:20:00]) -> Workspace + + + .. py:method:: delete(workspace_id: int) + + Delete a workspace. + + Terminates and deletes a Databricks workspace. From an API perspective, deletion is immediate. + However, it might take a few minutes for all workspaces resources to be deleted, depending on the size + and number of workspace resources. + + This operation is available only if your account is on the E2 version of the platform or on a select + custom plan that allows multiple workspaces per account. + + :param workspace_id: int + Workspace ID. + + + + + .. py:method:: get(workspace_id: int) -> Workspace + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + storage = a.storage.create( + storage_configuration_name=f'sdk-{time.time_ns()}', + root_bucket_info=provisioning.RootBucketInfo(bucket_name=os.environ["TEST_ROOT_BUCKET"])) + + role = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_CROSSACCOUNT_ARN"]))) + + created = a.workspaces.create(workspace_name=f'sdk-{time.time_ns()}', + aws_region=os.environ["AWS_REGION"], + credentials_id=role.credentials_id, + storage_configuration_id=storage.storage_configuration_id).result() + + by_id = a.workspaces.get(workspace_id=created.workspace_id) + + # cleanup + a.storage.delete(storage_configuration_id=storage.storage_configuration_id) + a.credentials.delete(credentials_id=role.credentials_id) + a.workspaces.delete(workspace_id=created.workspace_id) + + Get a workspace. + + Gets information including status for a Databricks workspace, specified by ID. In the response, the + `workspace_status` field indicates the current status. After initial workspace creation (which is + asynchronous), make repeated `GET` requests with the workspace ID and check its status. The workspace + becomes available when the status changes to `RUNNING`. + + For information about how to create a new workspace with this API **including error handling**, see + [Create a new workspace using the Account API]. + + This operation is available only if your account is on the E2 version of the platform or on a select + custom plan that allows multiple workspaces per account. + + [Create a new workspace using the Account API]: http://docs.databricks.com/administration-guide/account-api/new-workspace.html + + :param workspace_id: int + Workspace ID. + + :returns: :class:`Workspace` + + + .. py:method:: list() -> Iterator[Workspace] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + all = a.workspaces.list() + + Get all workspaces. + + Gets a list of all workspaces associated with an account, specified by ID. + + This operation is available only if your account is on the E2 version of the platform or on a select + custom plan that allows multiple workspaces per account. + + :returns: Iterator over :class:`Workspace` + + + .. py:method:: update(workspace_id: int [, aws_region: Optional[str], credentials_id: Optional[str], custom_tags: Optional[Dict[str, str]], managed_services_customer_managed_key_id: Optional[str], network_connectivity_config_id: Optional[str], network_id: Optional[str], storage_configuration_id: Optional[str], storage_customer_managed_key_id: Optional[str]]) -> Wait[Workspace] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + storage = a.storage.create( + storage_configuration_name=f'sdk-{time.time_ns()}', + root_bucket_info=provisioning.RootBucketInfo(bucket_name=os.environ["TEST_ROOT_BUCKET"])) + + role = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_CROSSACCOUNT_ARN"]))) + + update_role = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_CROSSACCOUNT_ARN"]))) + + created = a.workspaces.create(workspace_name=f'sdk-{time.time_ns()}', + aws_region=os.environ["AWS_REGION"], + credentials_id=role.credentials_id, + storage_configuration_id=storage.storage_configuration_id).result() + + _ = a.workspaces.update(workspace_id=created.workspace_id, credentials_id=update_role.credentials_id).result() + + # cleanup + a.storage.delete(storage_configuration_id=storage.storage_configuration_id) + a.credentials.delete(credentials_id=role.credentials_id) + a.credentials.delete(credentials_id=update_role.credentials_id) + a.workspaces.delete(workspace_id=created.workspace_id) + + Update workspace configuration. + + Updates a workspace configuration for either a running workspace or a failed workspace. The elements + that can be updated varies between these two use cases. + + ### Update a failed workspace You can update a Databricks workspace configuration for failed workspace + deployment for some fields, but not all fields. For a failed workspace, this request supports updates + to the following fields only: - Credential configuration ID - Storage configuration ID - Network + configuration ID. Used only to add or change a network configuration for a customer-managed VPC. For a + failed workspace only, you can convert a workspace with Databricks-managed VPC to use a + customer-managed VPC by adding this ID. You cannot downgrade a workspace with a customer-managed VPC + to be a Databricks-managed VPC. You can update the network configuration for a failed or running + workspace to add PrivateLink support, though you must also add a private access settings object. - Key + configuration ID for managed services (control plane storage, such as notebook source and Databricks + SQL queries). Used only if you use customer-managed keys for managed services. - Key configuration ID + for workspace storage (root S3 bucket and, optionally, EBS volumes). Used only if you use + customer-managed keys for workspace storage. **Important**: If the workspace was ever in the running + state, even if briefly before becoming a failed workspace, you cannot add a new key configuration ID + for workspace storage. - Private access settings ID to add PrivateLink support. You can add or update + the private access settings ID to upgrade a workspace to add support for front-end, back-end, or both + types of connectivity. You cannot remove (downgrade) any existing front-end or back-end PrivateLink + support on a workspace. - Custom tags. Given you provide an empty custom tags, the update would not be + applied. + + After calling the `PATCH` operation to update the workspace configuration, make repeated `GET` + requests with the workspace ID and check the workspace status. The workspace is successful if the + status changes to `RUNNING`. + + For information about how to create a new workspace with this API **including error handling**, see + [Create a new workspace using the Account API]. + + ### Update a running workspace You can update a Databricks workspace configuration for running + workspaces for some fields, but not all fields. For a running workspace, this request supports + updating the following fields only: - Credential configuration ID + + - Network configuration ID. Used only if you already use a customer-managed VPC. You cannot convert a + running workspace from a Databricks-managed VPC to a customer-managed VPC. You can use a network + configuration update in this API for a failed or running workspace to add support for PrivateLink, + although you also need to add a private access settings object. + + - Key configuration ID for managed services (control plane storage, such as notebook source and + Databricks SQL queries). Databricks does not directly encrypt the data with the customer-managed key + (CMK). Databricks uses both the CMK and the Databricks managed key (DMK) that is unique to your + workspace to encrypt the Data Encryption Key (DEK). Databricks uses the DEK to encrypt your + workspace's managed services persisted data. If the workspace does not already have a CMK for managed + services, adding this ID enables managed services encryption for new or updated data. Existing managed + services data that existed before adding the key remains not encrypted with the DEK until it is + modified. If the workspace already has customer-managed keys for managed services, this request + rotates (changes) the CMK keys and the DEK is re-encrypted with the DMK and the new CMK. - Key + configuration ID for workspace storage (root S3 bucket and, optionally, EBS volumes). You can set this + only if the workspace does not already have a customer-managed key configuration for workspace + storage. - Private access settings ID to add PrivateLink support. You can add or update the private + access settings ID to upgrade a workspace to add support for front-end, back-end, or both types of + connectivity. You cannot remove (downgrade) any existing front-end or back-end PrivateLink support on + a workspace. - Custom tags. Given you provide an empty custom tags, the update would not be applied. + + **Important**: To update a running workspace, your workspace must have no running compute resources + that run in your workspace's VPC in the Classic data plane. For example, stop all all-purpose + clusters, job clusters, pools with running clusters, and Classic SQL warehouses. If you do not + terminate all cluster instances in the workspace before calling this API, the request will fail. + + ### Wait until changes take effect. After calling the `PATCH` operation to update the workspace + configuration, make repeated `GET` requests with the workspace ID and check the workspace status and + the status of the fields. * For workspaces with a Databricks-managed VPC, the workspace status becomes + `PROVISIONING` temporarily (typically under 20 minutes). If the workspace update is successful, the + workspace status changes to `RUNNING`. Note that you can also check the workspace status in the + [Account Console]. However, you cannot use or create clusters for another 20 minutes after that status + change. This results in a total of up to 40 minutes in which you cannot create clusters. If you create + or use clusters before this time interval elapses, clusters do not launch successfully, fail, or could + cause other unexpected behavior. + + * For workspaces with a customer-managed VPC, the workspace status stays at status `RUNNING` and the + VPC change happens immediately. A change to the storage customer-managed key configuration ID might + take a few minutes to update, so continue to check the workspace until you observe that it has been + updated. If the update fails, the workspace might revert silently to its original configuration. After + the workspace has been updated, you cannot use or create clusters for another 20 minutes. If you + create or use clusters before this time interval elapses, clusters do not launch successfully, fail, + or could cause other unexpected behavior. + + If you update the _storage_ customer-managed key configurations, it takes 20 minutes for the changes + to fully take effect. During the 20 minute wait, it is important that you stop all REST API calls to + the DBFS API. If you are modifying _only the managed services key configuration_, you can omit the 20 + minute wait. + + **Important**: Customer-managed keys and customer-managed VPCs are supported by only some deployment + types and subscription types. If you have questions about availability, contact your Databricks + representative. + + This operation is available only if your account is on the E2 version of the platform or on a select + custom plan that allows multiple workspaces per account. + + [Account Console]: https://docs.databricks.com/administration-guide/account-settings-e2/account-console-e2.html + [Create a new workspace using the Account API]: http://docs.databricks.com/administration-guide/account-api/new-workspace.html + + :param workspace_id: int + Workspace ID. + :param aws_region: str (optional) + The AWS region of the workspace's data plane (for example, `us-west-2`). This parameter is available + only for updating failed workspaces. + :param credentials_id: str (optional) + ID of the workspace's credential configuration object. This parameter is available for updating both + failed and running workspaces. + :param custom_tags: Dict[str,str] (optional) + The custom tags key-value pairing that is attached to this workspace. The key-value pair is a string + of utf-8 characters. The value can be an empty string, with maximum length of 255 characters. The + key can be of maximum length of 127 characters, and cannot be empty. + :param managed_services_customer_managed_key_id: str (optional) + The ID of the workspace's managed services encryption key configuration object. This parameter is + available only for updating failed workspaces. + :param network_connectivity_config_id: str (optional) + The ID of the network connectivity configuration object, which is the parent resource of this + private endpoint rule object. + :param network_id: str (optional) + The ID of the workspace's network configuration object. Used only if you already use a + customer-managed VPC. For failed workspaces only, you can switch from a Databricks-managed VPC to a + customer-managed VPC by updating the workspace to add a network configuration ID. + :param storage_configuration_id: str (optional) + The ID of the workspace's storage configuration object. This parameter is available only for + updating failed workspaces. + :param storage_customer_managed_key_id: str (optional) + The ID of the key configuration object for workspace storage. This parameter is available for + updating both failed and running workspaces. + + :returns: + Long-running operation waiter for :class:`Workspace`. + See :method:wait_get_workspace_running for more details. + + + .. py:method:: update_and_wait(workspace_id: int [, aws_region: Optional[str], credentials_id: Optional[str], custom_tags: Optional[Dict[str, str]], managed_services_customer_managed_key_id: Optional[str], network_connectivity_config_id: Optional[str], network_id: Optional[str], storage_configuration_id: Optional[str], storage_customer_managed_key_id: Optional[str], timeout: datetime.timedelta = 0:20:00]) -> Workspace + + + .. py:method:: wait_get_workspace_running(workspace_id: int, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[Workspace], None]]) -> Workspace diff --git a/docs/account/settings/index.rst b/docs/account/settings/index.rst new file mode 100644 index 000000000..ece6a3255 --- /dev/null +++ b/docs/account/settings/index.rst @@ -0,0 +1,12 @@ + +Settings +======== + +Manage security settings for Accounts and Workspaces + +.. toctree:: + :maxdepth: 1 + + ip_access_lists + network_connectivity + settings \ No newline at end of file diff --git a/docs/account/settings/ip_access_lists.rst b/docs/account/settings/ip_access_lists.rst new file mode 100644 index 000000000..135dd2b55 --- /dev/null +++ b/docs/account/settings/ip_access_lists.rst @@ -0,0 +1,227 @@ +``a.ip_access_lists``: Account IP Access Lists +============================================== +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: AccountIpAccessListsAPI + + The Accounts IP Access List API enables account admins to configure IP access lists for access to the + account console. + + Account IP Access Lists affect web application access and REST API access to the account console and + account APIs. If the feature is disabled for the account, all access is allowed for this account. There is + support for allow lists (inclusion) and block lists (exclusion). + + When a connection is attempted: 1. **First, all block lists are checked.** If the connection IP address + matches any block list, the connection is rejected. 2. **If the connection was not rejected by block + lists**, the IP address is compared with the allow lists. + + If there is at least one allow list for the account, the connection is allowed only if the IP address + matches an allow list. If there are no allow lists for the account, all IP addresses are allowed. + + For all allow lists and block lists combined, the account supports a maximum of 1000 IP/CIDR values, where + one CIDR counts as a single value. + + After changes to the account-level IP access lists, it can take a few minutes for changes to take effect. + + .. py:method:: create(label: str, list_type: ListType [, ip_addresses: Optional[List[str]]]) -> CreateIpAccessListResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import settings + + w = WorkspaceClient() + + created = w.ip_access_lists.create(label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/16"], + list_type=settings.ListType.BLOCK) + + # cleanup + w.ip_access_lists.delete(ip_access_list_id=created.ip_access_list.list_id) + + Create access list. + + Creates an IP access list for the account. + + A list can be an allow list or a block list. See the top of this file for a description of how the + server treats allow lists and block lists at runtime. + + When creating or updating an IP access list: + + * For all allow lists and block lists combined, the API supports a maximum of 1000 IP/CIDR values, + where one CIDR counts as a single value. Attempts to exceed that number return error 400 with + `error_code` value `QUOTA_EXCEEDED`. * If the new list would block the calling user's current IP, + error 400 is returned with `error_code` value `INVALID_STATE`. + + It can take a few minutes for the changes to take effect. + + :param label: str + Label for the IP access list. This **cannot** be empty. + :param list_type: :class:`ListType` + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. + :param ip_addresses: List[str] (optional) + + :returns: :class:`CreateIpAccessListResponse` + + + .. py:method:: delete(ip_access_list_id: str) + + Delete access list. + + Deletes an IP access list, specified by its list ID. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + + + + + .. py:method:: get(ip_access_list_id: str) -> GetIpAccessListResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import settings + + w = WorkspaceClient() + + created = w.ip_access_lists.create(label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/16"], + list_type=settings.ListType.BLOCK) + + by_id = w.ip_access_lists.get(ip_access_list_id=created.ip_access_list.list_id) + + # cleanup + w.ip_access_lists.delete(ip_access_list_id=created.ip_access_list.list_id) + + Get IP access list. + + Gets an IP access list, specified by its list ID. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + + :returns: :class:`GetIpAccessListResponse` + + + .. py:method:: list() -> Iterator[IpAccessListInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.ip_access_lists.list() + + Get access lists. + + Gets all IP access lists for the specified account. + + :returns: Iterator over :class:`IpAccessListInfo` + + + .. py:method:: replace(ip_access_list_id: str, label: str, list_type: ListType, enabled: bool [, ip_addresses: Optional[List[str]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import settings + + w = WorkspaceClient() + + created = w.ip_access_lists.create(label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/16"], + list_type=settings.ListType.BLOCK) + + w.ip_access_lists.replace(ip_access_list_id=created.ip_access_list.list_id, + label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/24"], + list_type=settings.ListType.BLOCK, + enabled=False) + + # cleanup + w.ip_access_lists.delete(ip_access_list_id=created.ip_access_list.list_id) + + Replace access list. + + Replaces an IP access list, specified by its ID. + + A list can include allow lists and block lists. See the top of this file for a description of how the + server treats allow lists and block lists at run time. When replacing an IP access list: * For all + allow lists and block lists combined, the API supports a maximum of 1000 IP/CIDR values, where one + CIDR counts as a single value. Attempts to exceed that number return error 400 with `error_code` value + `QUOTA_EXCEEDED`. * If the resulting list would block the calling user's current IP, error 400 is + returned with `error_code` value `INVALID_STATE`. It can take a few minutes for the changes to take + effect. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + :param label: str + Label for the IP access list. This **cannot** be empty. + :param list_type: :class:`ListType` + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. + :param enabled: bool + Specifies whether this IP access list is enabled. + :param ip_addresses: List[str] (optional) + + + + + .. py:method:: update(ip_access_list_id: str [, enabled: Optional[bool], ip_addresses: Optional[List[str]], label: Optional[str], list_type: Optional[ListType]]) + + Update access list. + + Updates an existing IP access list, specified by its ID. + + A list can include allow lists and block lists. See the top of this file for a description of how the + server treats allow lists and block lists at run time. + + When updating an IP access list: + + * For all allow lists and block lists combined, the API supports a maximum of 1000 IP/CIDR values, + where one CIDR counts as a single value. Attempts to exceed that number return error 400 with + `error_code` value `QUOTA_EXCEEDED`. * If the updated list would block the calling user's current IP, + error 400 is returned with `error_code` value `INVALID_STATE`. + + It can take a few minutes for the changes to take effect. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + :param enabled: bool (optional) + Specifies whether this IP access list is enabled. + :param ip_addresses: List[str] (optional) + :param label: str (optional) + Label for the IP access list. This **cannot** be empty. + :param list_type: :class:`ListType` (optional) + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. + + + \ No newline at end of file diff --git a/docs/account/settings/network_connectivity.rst b/docs/account/settings/network_connectivity.rst new file mode 100644 index 000000000..979491e4f --- /dev/null +++ b/docs/account/settings/network_connectivity.rst @@ -0,0 +1,146 @@ +``a.network_connectivity``: Network Connectivity +================================================ +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: NetworkConnectivityAPI + + These APIs provide configurations for the network connectivity of your workspaces for serverless compute + resources. This API provides stable subnets for your workspace so that you can configure your firewalls on + your Azure Storage accounts to allow access from Databricks. You can also use the API to provision private + endpoints for Databricks to privately connect serverless compute resources to your Azure resources using + Azure Private Link. See [configure serverless secure connectivity]. + + [configure serverless secure connectivity]: https://learn.microsoft.com/azure/databricks/security/network/serverless-network-security + + .. py:method:: create_network_connectivity_configuration(name: str, region: str) -> NetworkConnectivityConfiguration + + Create a network connectivity configuration. + + Creates a network connectivity configuration (NCC), which provides stable Azure service subnets when + accessing your Azure Storage accounts. You can also use a network connectivity configuration to create + Databricks-managed private endpoints so that Databricks serverless compute resources privately access + your resources. + + **IMPORTANT**: After you create the network connectivity configuration, you must assign one or more + workspaces to the new network connectivity configuration. You can share one network connectivity + configuration with multiple workspaces from the same Azure region within the same Databricks account. + See [configure serverless secure connectivity]. + + [configure serverless secure connectivity]: https://learn.microsoft.com/azure/databricks/security/network/serverless-network-security + + :param name: str + The name of the network connectivity configuration. The name can contain alphanumeric characters, + hyphens, and underscores. The length must be between 3 and 30 characters. The name must match the + regular expression `^[0-9a-zA-Z-_]{3,30}$`. + :param region: str + The Azure region for this network connectivity configuration. Only workspaces in the same Azure + region can be attached to this network connectivity configuration. + + :returns: :class:`NetworkConnectivityConfiguration` + + + .. py:method:: create_private_endpoint_rule(network_connectivity_config_id: str, resource_id: str, group_id: CreatePrivateEndpointRuleRequestGroupId) -> NccAzurePrivateEndpointRule + + Create a private endpoint rule. + + Create a private endpoint rule for the specified network connectivity config object. Once the object + is created, Databricks asynchronously provisions a new Azure private endpoint to your specified Azure + resource. + + **IMPORTANT**: You must use Azure portal or other Azure tools to approve the private endpoint to + complete the connection. To get the information of the private endpoint created, make a `GET` request + on the new private endpoint rule. See [serverless private link]. + + [serverless private link]: https://learn.microsoft.com/azure/databricks/security/network/serverless-network-security/serverless-private-link + + :param network_connectivity_config_id: str + Your Network Connectvity Configuration ID. + :param resource_id: str + The Azure resource ID of the target resource. + :param group_id: :class:`CreatePrivateEndpointRuleRequestGroupId` + The sub-resource type (group ID) of the target resource. Note that to connect to workspace root + storage (root DBFS), you need two endpoints, one for `blob` and one for `dfs`. + + :returns: :class:`NccAzurePrivateEndpointRule` + + + .. py:method:: delete_network_connectivity_configuration(network_connectivity_config_id: str) + + Delete a network connectivity configuration. + + Deletes a network connectivity configuration. + + :param network_connectivity_config_id: str + Your Network Connectvity Configuration ID. + + + + + .. py:method:: delete_private_endpoint_rule(network_connectivity_config_id: str, private_endpoint_rule_id: str) -> NccAzurePrivateEndpointRule + + Delete a private endpoint rule. + + Initiates deleting a private endpoint rule. The private endpoint will be deactivated and will be + purged after seven days of deactivation. When a private endpoint is in deactivated state, + `deactivated` field is set to `true` and the private endpoint is not available to your serverless + compute resources. + + :param network_connectivity_config_id: str + Your Network Connectvity Configuration ID. + :param private_endpoint_rule_id: str + Your private endpoint rule ID. + + :returns: :class:`NccAzurePrivateEndpointRule` + + + .. py:method:: get_network_connectivity_configuration(network_connectivity_config_id: str) -> NetworkConnectivityConfiguration + + Get a network connectivity configuration. + + Gets a network connectivity configuration. + + :param network_connectivity_config_id: str + Your Network Connectvity Configuration ID. + + :returns: :class:`NetworkConnectivityConfiguration` + + + .. py:method:: get_private_endpoint_rule(network_connectivity_config_id: str, private_endpoint_rule_id: str) -> NccAzurePrivateEndpointRule + + Get a private endpoint rule. + + Gets the private endpoint rule. + + :param network_connectivity_config_id: str + Your Network Connectvity Configuration ID. + :param private_endpoint_rule_id: str + Your private endpoint rule ID. + + :returns: :class:`NccAzurePrivateEndpointRule` + + + .. py:method:: list_network_connectivity_configurations( [, page_token: Optional[str]]) -> Iterator[NetworkConnectivityConfiguration] + + List network connectivity configurations. + + Gets an array of network connectivity configurations. + + :param page_token: str (optional) + Pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`NetworkConnectivityConfiguration` + + + .. py:method:: list_private_endpoint_rules(network_connectivity_config_id: str [, page_token: Optional[str]]) -> Iterator[NccAzurePrivateEndpointRule] + + List private endpoint rules. + + Gets an array of private endpoint rules. + + :param network_connectivity_config_id: str + Your Network Connectvity Configuration ID. + :param page_token: str (optional) + Pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`NccAzurePrivateEndpointRule` + \ No newline at end of file diff --git a/docs/account/settings/settings.rst b/docs/account/settings/settings.rst new file mode 100644 index 000000000..7f9d44534 --- /dev/null +++ b/docs/account/settings/settings.rst @@ -0,0 +1,58 @@ +``a.settings``: Personal Compute Enablement +=========================================== +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: AccountSettingsAPI + + The Personal Compute enablement setting lets you control which users can use the Personal Compute default + policy to create compute resources. By default all users in all workspaces have access (ON), but you can + change the setting to instead let individual workspaces configure access control (DELEGATE). + + There is only one instance of this setting per account. Since this setting has a default value, this + setting is present on all accounts even though it's never set on a given account. Deletion reverts the + value of the setting back to the default value. + + .. py:method:: delete_personal_compute_setting(etag: str) -> DeletePersonalComputeSettingResponse + + Delete Personal Compute setting. + + Reverts back the Personal Compute setting value to default (ON) + + :param etag: str + etag used for versioning. The response is at least as fresh as the eTag provided. This is used for + optimistic concurrency control as a way to help prevent simultaneous writes of a setting overwriting + each other. It is strongly suggested that systems make use of the etag in the read -> delete pattern + to perform setting deletions in order to avoid race conditions. That is, get an etag from a GET + request, and pass it with the DELETE request to identify the rule set version you are deleting. + + :returns: :class:`DeletePersonalComputeSettingResponse` + + + .. py:method:: read_personal_compute_setting(etag: str) -> PersonalComputeSetting + + Get Personal Compute setting. + + Gets the value of the Personal Compute setting. + + :param etag: str + etag used for versioning. The response is at least as fresh as the eTag provided. This is used for + optimistic concurrency control as a way to help prevent simultaneous writes of a setting overwriting + each other. It is strongly suggested that systems make use of the etag in the read -> delete pattern + to perform setting deletions in order to avoid race conditions. That is, get an etag from a GET + request, and pass it with the DELETE request to identify the rule set version you are deleting. + + :returns: :class:`PersonalComputeSetting` + + + .. py:method:: update_personal_compute_setting( [, allow_missing: Optional[bool], setting: Optional[PersonalComputeSetting]]) -> PersonalComputeSetting + + Update Personal Compute setting. + + Updates the value of the Personal Compute setting. + + :param allow_missing: bool (optional) + This should always be set to true for Settings RPCs. Added for AIP compliance. + :param setting: :class:`PersonalComputeSetting` (optional) + + :returns: :class:`PersonalComputeSetting` + \ No newline at end of file diff --git a/docs/authentication.md b/docs/authentication.md index d5d0ef84f..4307ac3e6 100644 --- a/docs/authentication.md +++ b/docs/authentication.md @@ -84,13 +84,13 @@ The Databricks SDK for Python picks up an Azure CLI token, if you've previously To authenticate as an Azure Active Directory (Azure AD) service principal, you must provide one of the following. See also [Add a service principal to your Azure Databricks account](https://learn.microsoft.com/azure/databricks/administration-guide/users-groups/service-principals#add-sp-account): -- `azure_resource_id`, `azure_client_secret`, `azure_client_id`, and `azure_tenant_id`; or their environment variable or `.databrickscfg` file field equivalents. -- `azure_resource_id` and `azure_use_msi`; or their environment variable or `.databrickscfg` file field equivalents. +- `azure_workspace_resource_id`, `azure_client_secret`, `azure_client_id`, and `azure_tenant_id`; or their environment variable or `.databrickscfg` file field equivalents. +- `azure_workspace_resource_id` and `azure_use_msi`; or their environment variable or `.databrickscfg` file field equivalents. | Argument | Description | Environment variable | |-----------------------|-------------|----------------------| -| `azure_resource_id` | _(String)_ The Azure Resource Manager ID for the Azure Databricks workspace, which is exchanged for a Databricks host URL. | `DATABRICKS_AZURE_RESOURCE_ID` | -| `azure_use_msi` | _(Boolean)_ `true` to use Azure Managed Service Identity passwordless authentication flow for service principals. _This feature is not yet implemented in the Databricks SDK for Python._ | `ARM_USE_MSI` | +| `azure_workspace_resource_id` | _(String)_ The Azure Resource Manager ID for the Azure Databricks workspace, which is exchanged for a Databricks host URL. | `DATABRICKS_AZURE_RESOURCE_ID` | +| `azure_use_msi` | _(Boolean)_ `true` to use Azure Managed Service Identity passwordless authentication flow for service principals. | `ARM_USE_MSI` | | `azure_client_secret` | _(String)_ The Azure AD service principal's client secret. | `ARM_CLIENT_SECRET` | | `azure_client_id` | _(String)_ The Azure AD service principal's application ID. | `ARM_CLIENT_ID` | | `azure_tenant_id` | _(String)_ The Azure AD service principal's tenant ID. | `ARM_TENANT_ID` | diff --git a/docs/autogen/billing.rst b/docs/autogen/billing.rst deleted file mode 100644 index dbc17aa98..000000000 --- a/docs/autogen/billing.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``billing``: Billing -==================== - -Configure different aspects of Databricks billing and usage. - -.. automodule:: databricks.sdk.service.billing - :members: - :undoc-members: diff --git a/docs/autogen/catalog.rst b/docs/autogen/catalog.rst deleted file mode 100644 index f61dc413f..000000000 --- a/docs/autogen/catalog.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``catalog``: Unity Catalog -========================== - -Configure data governance with Unity Catalog for metastores, catalogs, schemas, tables, external locations, and storage credentials - -.. automodule:: databricks.sdk.service.catalog - :members: - :undoc-members: diff --git a/docs/autogen/compute.rst b/docs/autogen/compute.rst deleted file mode 100644 index 26ff959a5..000000000 --- a/docs/autogen/compute.rst +++ /dev/null @@ -1,14 +0,0 @@ - -``compute``: Compute -==================== - -Use and configure compute for Databricks - -.. automodule:: databricks.sdk.service.compute - :members: - :undoc-members: - -.. automodule:: databricks.sdk.mixins.compute - :members: - :inherited-members: - :undoc-members: diff --git a/docs/autogen/files.rst b/docs/autogen/files.rst deleted file mode 100644 index e0aa0dbb6..000000000 --- a/docs/autogen/files.rst +++ /dev/null @@ -1,14 +0,0 @@ - -``files``: File Management -========================== - -Manage files on Databricks in a filesystem-like interface - -.. automodule:: databricks.sdk.service.files - :members: - :undoc-members: - -.. automodule:: databricks.sdk.mixins.files - :members: - :inherited-members: - :undoc-members: diff --git a/docs/autogen/iam.rst b/docs/autogen/iam.rst deleted file mode 100644 index da047eccf..000000000 --- a/docs/autogen/iam.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``iam``: Identity and Access Management -======================================= - -Manage users, service principals, groups and their permissions in Accounts and Workspaces - -.. automodule:: databricks.sdk.service.iam - :members: - :undoc-members: diff --git a/docs/autogen/jobs.rst b/docs/autogen/jobs.rst deleted file mode 100644 index cb406749c..000000000 --- a/docs/autogen/jobs.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``jobs``: Jobs -============== - -Schedule automated jobs on Databricks Workspaces - -.. automodule:: databricks.sdk.service.jobs - :members: - :undoc-members: diff --git a/docs/autogen/ml.rst b/docs/autogen/ml.rst deleted file mode 100644 index 6e51ec501..000000000 --- a/docs/autogen/ml.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``ml``: Machine Learning -======================== - -Create and manage experiments, features, and other machine learning artifacts - -.. automodule:: databricks.sdk.service.ml - :members: - :undoc-members: diff --git a/docs/autogen/oauth2.rst b/docs/autogen/oauth2.rst deleted file mode 100644 index d210688ec..000000000 --- a/docs/autogen/oauth2.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``oauth2``: OAuth -================= - -Configure OAuth 2.0 application registrations for Databricks - -.. automodule:: databricks.sdk.service.oauth2 - :members: - :undoc-members: diff --git a/docs/autogen/pipelines.rst b/docs/autogen/pipelines.rst deleted file mode 100644 index 091042bd5..000000000 --- a/docs/autogen/pipelines.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``pipelines``: Delta Live Tables -================================ - -Manage pipelines, runs, and other Delta Live Table resources - -.. automodule:: databricks.sdk.service.pipelines - :members: - :undoc-members: diff --git a/docs/autogen/provisioning.rst b/docs/autogen/provisioning.rst deleted file mode 100644 index c17a200fa..000000000 --- a/docs/autogen/provisioning.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``provisioning``: Provisioning -============================== - -Resource management for secure Databricks Workspace deployment, cross-account IAM roles, storage, encryption, networking and private access. - -.. automodule:: databricks.sdk.service.provisioning - :members: - :undoc-members: diff --git a/docs/autogen/serving.rst b/docs/autogen/serving.rst deleted file mode 100644 index 73f3e5aef..000000000 --- a/docs/autogen/serving.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``serving``: Real-time Serving -============================== - -Use real-time inference for machine learning - -.. automodule:: databricks.sdk.service.serving - :members: - :undoc-members: diff --git a/docs/autogen/settings.rst b/docs/autogen/settings.rst deleted file mode 100644 index f0d41a1e6..000000000 --- a/docs/autogen/settings.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``settings``: Settings -====================== - -Manage security settings for Accounts and Workspaces - -.. automodule:: databricks.sdk.service.settings - :members: - :undoc-members: diff --git a/docs/autogen/sharing.rst b/docs/autogen/sharing.rst deleted file mode 100644 index cef34fcd8..000000000 --- a/docs/autogen/sharing.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``sharing``: Delta Sharing -========================== - -Configure data sharing with Unity Catalog for providers, recipients, and shares - -.. automodule:: databricks.sdk.service.sharing - :members: - :undoc-members: diff --git a/docs/autogen/sql.rst b/docs/autogen/sql.rst deleted file mode 100644 index 1f816cb6f..000000000 --- a/docs/autogen/sql.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``sql``: Databricks SQL -======================= - -Manage Databricks SQL assets, including warehouses, dashboards, queries and query history, and alerts - -.. automodule:: databricks.sdk.service.sql - :members: - :undoc-members: diff --git a/docs/autogen/workspace.rst b/docs/autogen/workspace.rst deleted file mode 100644 index 36fcad4b4..000000000 --- a/docs/autogen/workspace.rst +++ /dev/null @@ -1,14 +0,0 @@ - -``workspace``: Databricks Workspace -=================================== - -Manage workspace-level entities that include notebooks, Git checkouts, and secrets - -.. automodule:: databricks.sdk.service.workspace - :members: - :undoc-members: - -.. automodule:: databricks.sdk.mixins.workspace - :members: - :inherited-members: - :undoc-members: diff --git a/docs/clients.rst b/docs/clients.rst deleted file mode 100644 index 359c3ee3b..000000000 --- a/docs/clients.rst +++ /dev/null @@ -1,11 +0,0 @@ - -SDK Clients -=========== - -.. autoclass:: databricks.sdk.WorkspaceClient - :members: - :undoc-members: - -.. autoclass:: databricks.sdk.AccountClient - :members: - :undoc-members: diff --git a/docs/clients/account.rst b/docs/clients/account.rst new file mode 100644 index 000000000..7862cd978 --- /dev/null +++ b/docs/clients/account.rst @@ -0,0 +1,6 @@ +Account Client +============== + +.. autoclass:: databricks.sdk.AccountClient + :members: + :undoc-members: diff --git a/docs/clients/workspace.rst b/docs/clients/workspace.rst new file mode 100644 index 000000000..31bad0ec9 --- /dev/null +++ b/docs/clients/workspace.rst @@ -0,0 +1,6 @@ +Workspace Client +================ + +.. autoclass:: databricks.sdk.WorkspaceClient + :members: + :undoc-members: diff --git a/docs/conf.py b/docs/conf.py index 5bab1f4ba..d184be022 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,7 +18,8 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'myst_parser', 'enum_tools.autoenum' + 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'myst_parser', + 'db_sphinx_ext' ] templates_path = ['_templates'] @@ -44,3 +45,5 @@ autodoc_default_options = { 'member-order': 'bysource', } + +toc_object_entries = False diff --git a/docs/db_sphinx_ext.py b/docs/db_sphinx_ext.py new file mode 100644 index 000000000..71fc0010a --- /dev/null +++ b/docs/db_sphinx_ext.py @@ -0,0 +1,9 @@ +def remove_class_signature(app, what, name, obj, options, signature, return_annotation): + if what == "class": + # Set the signature to None for classes. Otherwise, there is duplication of the dataclass parameters and + # documentation, and there is far too much visual noise. + return (None, return_annotation) + return (signature, return_annotation) + +def setup(app): + app.connect('autodoc-process-signature', remove_class_signature) diff --git a/docs/dbdataclasses/billing.rst b/docs/dbdataclasses/billing.rst new file mode 100644 index 000000000..ac99067b1 --- /dev/null +++ b/docs/dbdataclasses/billing.rst @@ -0,0 +1,36 @@ +Billing +======= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.billing`` module. + +.. py:currentmodule:: databricks.sdk.service.billing +.. autoclass:: Budget + :members: +.. autoclass:: BudgetAlert + :members: +.. autoclass:: BudgetList + :members: +.. autoclass:: BudgetWithStatus + :members: +.. autoclass:: BudgetWithStatusStatusDailyItem + :members: +.. autoclass:: CreateLogDeliveryConfigurationParams + :members: +.. autoclass:: DownloadResponse + :members: +.. autoclass:: LogDeliveryConfiguration + :members: +.. autoclass:: LogDeliveryStatus + :members: +.. autoclass:: UpdateLogDeliveryConfigurationStatusRequest + :members: +.. autoclass:: WrappedBudget + :members: +.. autoclass:: WrappedBudgetWithStatus + :members: +.. autoclass:: WrappedCreateLogDeliveryConfiguration + :members: +.. autoclass:: WrappedLogDeliveryConfiguration + :members: +.. autoclass:: WrappedLogDeliveryConfigurations + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/catalog.rst b/docs/dbdataclasses/catalog.rst new file mode 100644 index 000000000..c4cc0a5b4 --- /dev/null +++ b/docs/dbdataclasses/catalog.rst @@ -0,0 +1,214 @@ +Unity Catalog +============= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.catalog`` module. + +.. py:currentmodule:: databricks.sdk.service.catalog +.. autoclass:: AccountsCreateMetastore + :members: +.. autoclass:: AccountsCreateMetastoreAssignment + :members: +.. autoclass:: AccountsCreateStorageCredential + :members: +.. autoclass:: AccountsMetastoreAssignment + :members: +.. autoclass:: AccountsMetastoreInfo + :members: +.. autoclass:: AccountsStorageCredentialInfo + :members: +.. autoclass:: AccountsUpdateMetastore + :members: +.. autoclass:: AccountsUpdateMetastoreAssignment + :members: +.. autoclass:: AccountsUpdateStorageCredential + :members: +.. autoclass:: ArtifactAllowlistInfo + :members: +.. autoclass:: ArtifactMatcher + :members: +.. autoclass:: AwsIamRole + :members: +.. autoclass:: AzureManagedIdentity + :members: +.. autoclass:: AzureServicePrincipal + :members: +.. autoclass:: CatalogInfo + :members: +.. autoclass:: CloudflareApiToken + :members: +.. autoclass:: ColumnInfo + :members: +.. autoclass:: ColumnMask + :members: +.. autoclass:: ConnectionInfo + :members: +.. autoclass:: CreateCatalog + :members: +.. autoclass:: CreateConnection + :members: +.. autoclass:: CreateExternalLocation + :members: +.. autoclass:: CreateFunction + :members: +.. autoclass:: CreateFunctionRequest + :members: +.. autoclass:: CreateMetastore + :members: +.. autoclass:: CreateMetastoreAssignment + :members: +.. autoclass:: CreateRegisteredModelRequest + :members: +.. autoclass:: CreateSchema + :members: +.. autoclass:: CreateStorageCredential + :members: +.. autoclass:: CreateTableConstraint + :members: +.. autoclass:: CreateVolumeRequestContent + :members: +.. autoclass:: CurrentWorkspaceBindings + :members: +.. autoclass:: DatabricksGcpServiceAccountResponse + :members: +.. autoclass:: DeltaRuntimePropertiesKvPairs + :members: +.. autoclass:: Dependency + :members: +.. autoclass:: DependencyList + :members: +.. autoclass:: EffectivePermissionsList + :members: +.. autoclass:: EffectivePredictiveOptimizationFlag + :members: +.. autoclass:: EffectivePrivilege + :members: +.. autoclass:: EffectivePrivilegeAssignment + :members: +.. autoclass:: EncryptionDetails + :members: +.. autoclass:: ExternalLocationInfo + :members: +.. autoclass:: ForeignKeyConstraint + :members: +.. autoclass:: FunctionDependency + :members: +.. autoclass:: FunctionInfo + :members: +.. autoclass:: FunctionParameterInfo + :members: +.. autoclass:: FunctionParameterInfos + :members: +.. autoclass:: GetMetastoreSummaryResponse + :members: +.. autoclass:: ListAccountMetastoreAssignmentsResponse + :members: +.. autoclass:: ListCatalogsResponse + :members: +.. autoclass:: ListConnectionsResponse + :members: +.. autoclass:: ListExternalLocationsResponse + :members: +.. autoclass:: ListFunctionsResponse + :members: +.. autoclass:: ListMetastoresResponse + :members: +.. autoclass:: ListModelVersionsResponse + :members: +.. autoclass:: ListRegisteredModelsResponse + :members: +.. autoclass:: ListSchemasResponse + :members: +.. autoclass:: ListStorageCredentialsResponse + :members: +.. autoclass:: ListSystemSchemasResponse + :members: +.. autoclass:: ListTableSummariesResponse + :members: +.. autoclass:: ListTablesResponse + :members: +.. autoclass:: ListVolumesResponseContent + :members: +.. autoclass:: MetastoreAssignment + :members: +.. autoclass:: MetastoreInfo + :members: +.. autoclass:: ModelVersionInfo + :members: +.. autoclass:: NamedTableConstraint + :members: +.. autoclass:: PermissionsChange + :members: +.. autoclass:: PermissionsList + :members: +.. autoclass:: PrimaryKeyConstraint + :members: +.. autoclass:: PrivilegeAssignment + :members: +.. autoclass:: ProvisioningInfo + :members: +.. autoclass:: RegisteredModelAlias + :members: +.. autoclass:: RegisteredModelInfo + :members: +.. autoclass:: SchemaInfo + :members: +.. autoclass:: SetArtifactAllowlist + :members: +.. autoclass:: SetRegisteredModelAliasRequest + :members: +.. autoclass:: SseEncryptionDetails + :members: +.. autoclass:: StorageCredentialInfo + :members: +.. autoclass:: SystemSchemaInfo + :members: +.. autoclass:: TableConstraint + :members: +.. autoclass:: TableDependency + :members: +.. autoclass:: TableInfo + :members: +.. autoclass:: TableRowFilter + :members: +.. autoclass:: TableSummary + :members: +.. autoclass:: UpdateCatalog + :members: +.. autoclass:: UpdateConnection + :members: +.. autoclass:: UpdateExternalLocation + :members: +.. autoclass:: UpdateFunction + :members: +.. autoclass:: UpdateMetastore + :members: +.. autoclass:: UpdateMetastoreAssignment + :members: +.. autoclass:: UpdateModelVersionRequest + :members: +.. autoclass:: UpdatePermissions + :members: +.. autoclass:: UpdateRegisteredModelRequest + :members: +.. autoclass:: UpdateSchema + :members: +.. autoclass:: UpdateStorageCredential + :members: +.. autoclass:: UpdateVolumeRequestContent + :members: +.. autoclass:: UpdateWorkspaceBindings + :members: +.. autoclass:: UpdateWorkspaceBindingsParameters + :members: +.. autoclass:: ValidateStorageCredential + :members: +.. autoclass:: ValidateStorageCredentialResponse + :members: +.. autoclass:: ValidationResult + :members: +.. autoclass:: VolumeInfo + :members: +.. autoclass:: WorkspaceBinding + :members: +.. autoclass:: WorkspaceBindingsResponse + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/compute.rst b/docs/dbdataclasses/compute.rst new file mode 100644 index 000000000..f22e7fa83 --- /dev/null +++ b/docs/dbdataclasses/compute.rst @@ -0,0 +1,252 @@ +Compute +======= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.compute`` module. + +.. py:currentmodule:: databricks.sdk.service.compute +.. autoclass:: AddInstanceProfile + :members: +.. autoclass:: AutoScale + :members: +.. autoclass:: AwsAttributes + :members: +.. autoclass:: AzureAttributes + :members: +.. autoclass:: CancelCommand + :members: +.. autoclass:: ChangeClusterOwner + :members: +.. autoclass:: ClientsTypes + :members: +.. autoclass:: CloudProviderNodeInfo + :members: +.. autoclass:: ClusterAccessControlRequest + :members: +.. autoclass:: ClusterAccessControlResponse + :members: +.. autoclass:: ClusterAttributes + :members: +.. autoclass:: ClusterDetails + :members: +.. autoclass:: ClusterEvent + :members: +.. autoclass:: ClusterLibraryStatuses + :members: +.. autoclass:: ClusterLogConf + :members: +.. autoclass:: ClusterPermission + :members: +.. autoclass:: ClusterPermissions + :members: +.. autoclass:: ClusterPermissionsDescription + :members: +.. autoclass:: ClusterPermissionsRequest + :members: +.. autoclass:: ClusterPolicyAccessControlRequest + :members: +.. autoclass:: ClusterPolicyAccessControlResponse + :members: +.. autoclass:: ClusterPolicyPermission + :members: +.. autoclass:: ClusterPolicyPermissions + :members: +.. autoclass:: ClusterPolicyPermissionsDescription + :members: +.. autoclass:: ClusterPolicyPermissionsRequest + :members: +.. autoclass:: ClusterSize + :members: +.. autoclass:: ClusterSpec + :members: +.. autoclass:: Command + :members: +.. autoclass:: CommandStatusResponse + :members: +.. autoclass:: ComputeSpec + :members: +.. autoclass:: ContextStatusResponse + :members: +.. autoclass:: CreateCluster + :members: +.. autoclass:: CreateClusterResponse + :members: +.. autoclass:: CreateContext + :members: +.. autoclass:: CreateInstancePool + :members: +.. autoclass:: CreateInstancePoolResponse + :members: +.. autoclass:: CreatePolicy + :members: +.. autoclass:: CreatePolicyResponse + :members: +.. autoclass:: CreateResponse + :members: +.. autoclass:: Created + :members: +.. autoclass:: DataPlaneEventDetails + :members: +.. autoclass:: DbfsStorageInfo + :members: +.. autoclass:: DeleteCluster + :members: +.. autoclass:: DeleteInstancePool + :members: +.. autoclass:: DeletePolicy + :members: +.. autoclass:: DestroyContext + :members: +.. autoclass:: DiskSpec + :members: +.. autoclass:: DiskType + :members: +.. autoclass:: DockerBasicAuth + :members: +.. autoclass:: DockerImage + :members: +.. autoclass:: EditCluster + :members: +.. autoclass:: EditInstancePool + :members: +.. autoclass:: EditPolicy + :members: +.. autoclass:: EventDetails + :members: +.. autoclass:: GcpAttributes + :members: +.. autoclass:: GetClusterPermissionLevelsResponse + :members: +.. autoclass:: GetClusterPolicyPermissionLevelsResponse + :members: +.. autoclass:: GetEvents + :members: +.. autoclass:: GetEventsResponse + :members: +.. autoclass:: GetInstancePool + :members: +.. autoclass:: GetInstancePoolPermissionLevelsResponse + :members: +.. autoclass:: GetSparkVersionsResponse + :members: +.. autoclass:: GlobalInitScriptCreateRequest + :members: +.. autoclass:: GlobalInitScriptDetails + :members: +.. autoclass:: GlobalInitScriptDetailsWithContent + :members: +.. autoclass:: GlobalInitScriptUpdateRequest + :members: +.. autoclass:: InitScriptEventDetails + :members: +.. autoclass:: InitScriptExecutionDetails + :members: +.. autoclass:: InitScriptInfo + :members: +.. autoclass:: InitScriptInfoAndExecutionDetails + :members: +.. autoclass:: InstallLibraries + :members: +.. autoclass:: InstancePoolAccessControlRequest + :members: +.. autoclass:: InstancePoolAccessControlResponse + :members: +.. autoclass:: InstancePoolAndStats + :members: +.. autoclass:: InstancePoolAwsAttributes + :members: +.. autoclass:: InstancePoolAzureAttributes + :members: +.. autoclass:: InstancePoolGcpAttributes + :members: +.. autoclass:: InstancePoolPermission + :members: +.. autoclass:: InstancePoolPermissions + :members: +.. autoclass:: InstancePoolPermissionsDescription + :members: +.. autoclass:: InstancePoolPermissionsRequest + :members: +.. autoclass:: InstancePoolStats + :members: +.. autoclass:: InstancePoolStatus + :members: +.. autoclass:: InstanceProfile + :members: +.. autoclass:: Library + :members: +.. autoclass:: LibraryFullStatus + :members: +.. autoclass:: ListAllClusterLibraryStatusesResponse + :members: +.. autoclass:: ListAvailableZonesResponse + :members: +.. autoclass:: ListClustersResponse + :members: +.. autoclass:: ListGlobalInitScriptsResponse + :members: +.. autoclass:: ListInstancePools + :members: +.. autoclass:: ListInstanceProfilesResponse + :members: +.. autoclass:: ListNodeTypesResponse + :members: +.. autoclass:: ListPoliciesResponse + :members: +.. autoclass:: ListPolicyFamiliesResponse + :members: +.. autoclass:: LocalFileInfo + :members: +.. autoclass:: LogAnalyticsInfo + :members: +.. autoclass:: LogSyncStatus + :members: +.. autoclass:: MavenLibrary + :members: +.. autoclass:: NodeInstanceType + :members: +.. autoclass:: NodeType + :members: +.. autoclass:: PendingInstanceError + :members: +.. autoclass:: PermanentDeleteCluster + :members: +.. autoclass:: PinCluster + :members: +.. autoclass:: Policy + :members: +.. autoclass:: PolicyFamily + :members: +.. autoclass:: PythonPyPiLibrary + :members: +.. autoclass:: RCranLibrary + :members: +.. autoclass:: RemoveInstanceProfile + :members: +.. autoclass:: ResizeCluster + :members: +.. autoclass:: RestartCluster + :members: +.. autoclass:: Results + :members: +.. autoclass:: S3StorageInfo + :members: +.. autoclass:: SparkNode + :members: +.. autoclass:: SparkNodeAwsAttributes + :members: +.. autoclass:: SparkVersion + :members: +.. autoclass:: StartCluster + :members: +.. autoclass:: TerminationReason + :members: +.. autoclass:: UninstallLibraries + :members: +.. autoclass:: UnpinCluster + :members: +.. autoclass:: VolumesStorageInfo + :members: +.. autoclass:: WorkloadType + :members: +.. autoclass:: WorkspaceStorageInfo + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/dashboards.rst b/docs/dbdataclasses/dashboards.rst new file mode 100644 index 000000000..3b24ec751 --- /dev/null +++ b/docs/dbdataclasses/dashboards.rst @@ -0,0 +1,8 @@ +Dashboards +========== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.dashboards`` module. + +.. py:currentmodule:: databricks.sdk.service.dashboards +.. autoclass:: PublishRequest + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/files.rst b/docs/dbdataclasses/files.rst new file mode 100644 index 000000000..b7c1f1914 --- /dev/null +++ b/docs/dbdataclasses/files.rst @@ -0,0 +1,30 @@ +File Management +=============== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.files`` module. + +.. py:currentmodule:: databricks.sdk.service.files +.. autoclass:: AddBlock + :members: +.. autoclass:: Close + :members: +.. autoclass:: Create + :members: +.. autoclass:: CreateResponse + :members: +.. autoclass:: Delete + :members: +.. autoclass:: DownloadResponse + :members: +.. autoclass:: FileInfo + :members: +.. autoclass:: ListStatusResponse + :members: +.. autoclass:: MkDirs + :members: +.. autoclass:: Move + :members: +.. autoclass:: Put + :members: +.. autoclass:: ReadResponse + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/iam.rst b/docs/dbdataclasses/iam.rst new file mode 100644 index 000000000..6f76e3176 --- /dev/null +++ b/docs/dbdataclasses/iam.rst @@ -0,0 +1,80 @@ +Identity and Access Management +============================== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.iam`` module. + +.. py:currentmodule:: databricks.sdk.service.iam +.. autoclass:: AccessControlRequest + :members: +.. autoclass:: AccessControlResponse + :members: +.. autoclass:: ComplexValue + :members: +.. autoclass:: GetAssignableRolesForResourceResponse + :members: +.. autoclass:: GetPasswordPermissionLevelsResponse + :members: +.. autoclass:: GetPermissionLevelsResponse + :members: +.. autoclass:: GrantRule + :members: +.. autoclass:: Group + :members: +.. autoclass:: ListGroupsResponse + :members: +.. autoclass:: ListServicePrincipalResponse + :members: +.. autoclass:: ListUsersResponse + :members: +.. autoclass:: Name + :members: +.. autoclass:: ObjectPermissions + :members: +.. autoclass:: PartialUpdate + :members: +.. autoclass:: PasswordAccessControlRequest + :members: +.. autoclass:: PasswordAccessControlResponse + :members: +.. autoclass:: PasswordPermission + :members: +.. autoclass:: PasswordPermissions + :members: +.. autoclass:: PasswordPermissionsDescription + :members: +.. autoclass:: PasswordPermissionsRequest + :members: +.. autoclass:: Patch + :members: +.. autoclass:: Permission + :members: +.. autoclass:: PermissionAssignment + :members: +.. autoclass:: PermissionAssignments + :members: +.. autoclass:: PermissionOutput + :members: +.. autoclass:: PermissionsDescription + :members: +.. autoclass:: PermissionsRequest + :members: +.. autoclass:: PrincipalOutput + :members: +.. autoclass:: ResourceMeta + :members: +.. autoclass:: Role + :members: +.. autoclass:: RuleSetResponse + :members: +.. autoclass:: RuleSetUpdateRequest + :members: +.. autoclass:: ServicePrincipal + :members: +.. autoclass:: UpdateRuleSetRequest + :members: +.. autoclass:: UpdateWorkspaceAssignments + :members: +.. autoclass:: User + :members: +.. autoclass:: WorkspacePermissions + :members: \ No newline at end of file diff --git a/docs/autogen/reference.rst b/docs/dbdataclasses/index.rst similarity index 78% rename from docs/autogen/reference.rst rename to docs/dbdataclasses/index.rst index 5d5b020f1..f35ac7c51 100644 --- a/docs/autogen/reference.rst +++ b/docs/dbdataclasses/index.rst @@ -1,22 +1,24 @@ -Reference -========= +Dataclasses +=========== .. toctree:: :maxdepth: 1 - billing - catalog + workspace compute - files - iam jobs - ml - oauth2 pipelines - provisioning + files + ml serving - settings - sharing + iam sql - workspace + catalog + sharing + settings + provisioning + billing + oauth2 + vectorsearch + dashboards \ No newline at end of file diff --git a/docs/dbdataclasses/jobs.rst b/docs/dbdataclasses/jobs.rst new file mode 100644 index 000000000..48cca1cfe --- /dev/null +++ b/docs/dbdataclasses/jobs.rst @@ -0,0 +1,208 @@ +Jobs +==== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.jobs`` module. + +.. py:currentmodule:: databricks.sdk.service.jobs +.. autoclass:: BaseJob + :members: +.. autoclass:: BaseRun + :members: +.. autoclass:: CancelAllRuns + :members: +.. autoclass:: CancelRun + :members: +.. autoclass:: ClusterInstance + :members: +.. autoclass:: ClusterSpec + :members: +.. autoclass:: ConditionTask + :members: +.. autoclass:: Continuous + :members: +.. autoclass:: CreateJob + :members: +.. autoclass:: CreateResponse + :members: +.. autoclass:: CronSchedule + :members: +.. autoclass:: DbtOutput + :members: +.. autoclass:: DbtTask + :members: +.. autoclass:: DeleteJob + :members: +.. autoclass:: DeleteRun + :members: +.. autoclass:: ExportRunOutput + :members: +.. autoclass:: FileArrivalTriggerConfiguration + :members: +.. autoclass:: GetJobPermissionLevelsResponse + :members: +.. autoclass:: GitSnapshot + :members: +.. autoclass:: GitSource + :members: +.. autoclass:: Job + :members: +.. autoclass:: JobAccessControlRequest + :members: +.. autoclass:: JobAccessControlResponse + :members: +.. autoclass:: JobCluster + :members: +.. autoclass:: JobCompute + :members: +.. autoclass:: JobDeployment + :members: +.. autoclass:: JobEmailNotifications + :members: +.. autoclass:: JobNotificationSettings + :members: +.. autoclass:: JobParameter + :members: +.. autoclass:: JobParameterDefinition + :members: +.. autoclass:: JobPermission + :members: +.. autoclass:: JobPermissions + :members: +.. autoclass:: JobPermissionsDescription + :members: +.. autoclass:: JobPermissionsRequest + :members: +.. autoclass:: JobRunAs + :members: +.. autoclass:: JobSettings + :members: +.. autoclass:: JobSource + :members: +.. autoclass:: JobsHealthRule + :members: +.. autoclass:: JobsHealthRules + :members: +.. autoclass:: ListJobsResponse + :members: +.. autoclass:: ListRunsResponse + :members: +.. autoclass:: NotebookOutput + :members: +.. autoclass:: NotebookTask + :members: +.. autoclass:: PipelineParams + :members: +.. autoclass:: PipelineTask + :members: +.. autoclass:: PythonWheelTask + :members: +.. autoclass:: QueueSettings + :members: +.. autoclass:: RepairHistoryItem + :members: +.. autoclass:: RepairRun + :members: +.. autoclass:: RepairRunResponse + :members: +.. autoclass:: ResetJob + :members: +.. autoclass:: ResolvedConditionTaskValues + :members: +.. autoclass:: ResolvedDbtTaskValues + :members: +.. autoclass:: ResolvedNotebookTaskValues + :members: +.. autoclass:: ResolvedParamPairValues + :members: +.. autoclass:: ResolvedPythonWheelTaskValues + :members: +.. autoclass:: ResolvedRunJobTaskValues + :members: +.. autoclass:: ResolvedStringParamsValues + :members: +.. autoclass:: ResolvedValues + :members: +.. autoclass:: Run + :members: +.. autoclass:: RunConditionTask + :members: +.. autoclass:: RunJobOutput + :members: +.. autoclass:: RunJobTask + :members: +.. autoclass:: RunNow + :members: +.. autoclass:: RunNowResponse + :members: +.. autoclass:: RunOutput + :members: +.. autoclass:: RunParameters + :members: +.. autoclass:: RunState + :members: +.. autoclass:: RunTask + :members: +.. autoclass:: SparkJarTask + :members: +.. autoclass:: SparkPythonTask + :members: +.. autoclass:: SparkSubmitTask + :members: +.. autoclass:: SqlAlertOutput + :members: +.. autoclass:: SqlDashboardOutput + :members: +.. autoclass:: SqlDashboardWidgetOutput + :members: +.. autoclass:: SqlOutput + :members: +.. autoclass:: SqlOutputError + :members: +.. autoclass:: SqlQueryOutput + :members: +.. autoclass:: SqlStatementOutput + :members: +.. autoclass:: SqlTask + :members: +.. autoclass:: SqlTaskAlert + :members: +.. autoclass:: SqlTaskDashboard + :members: +.. autoclass:: SqlTaskFile + :members: +.. autoclass:: SqlTaskQuery + :members: +.. autoclass:: SqlTaskSubscription + :members: +.. autoclass:: SubmitRun + :members: +.. autoclass:: SubmitRunResponse + :members: +.. autoclass:: SubmitTask + :members: +.. autoclass:: Task + :members: +.. autoclass:: TaskDependency + :members: +.. autoclass:: TaskEmailNotifications + :members: +.. autoclass:: TaskNotificationSettings + :members: +.. autoclass:: TriggerEvaluation + :members: +.. autoclass:: TriggerHistory + :members: +.. autoclass:: TriggerInfo + :members: +.. autoclass:: TriggerSettings + :members: +.. autoclass:: UpdateJob + :members: +.. autoclass:: ViewItem + :members: +.. autoclass:: Webhook + :members: +.. autoclass:: WebhookNotifications + :members: +.. autoclass:: WebhookNotificationsOnDurationWarningThresholdExceededItem + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/ml.rst b/docs/dbdataclasses/ml.rst new file mode 100644 index 000000000..3764a9251 --- /dev/null +++ b/docs/dbdataclasses/ml.rst @@ -0,0 +1,228 @@ +Machine Learning +================ + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.ml`` module. + +.. py:currentmodule:: databricks.sdk.service.ml +.. autoclass:: Activity + :members: +.. autoclass:: ApproveTransitionRequest + :members: +.. autoclass:: ApproveTransitionRequestResponse + :members: +.. autoclass:: CommentObject + :members: +.. autoclass:: CreateComment + :members: +.. autoclass:: CreateCommentResponse + :members: +.. autoclass:: CreateExperiment + :members: +.. autoclass:: CreateExperimentResponse + :members: +.. autoclass:: CreateModelRequest + :members: +.. autoclass:: CreateModelResponse + :members: +.. autoclass:: CreateModelVersionRequest + :members: +.. autoclass:: CreateModelVersionResponse + :members: +.. autoclass:: CreateRegistryWebhook + :members: +.. autoclass:: CreateRun + :members: +.. autoclass:: CreateRunResponse + :members: +.. autoclass:: CreateTransitionRequest + :members: +.. autoclass:: CreateTransitionRequestResponse + :members: +.. autoclass:: CreateWebhookResponse + :members: +.. autoclass:: Dataset + :members: +.. autoclass:: DatasetInput + :members: +.. autoclass:: DeleteExperiment + :members: +.. autoclass:: DeleteRun + :members: +.. autoclass:: DeleteRuns + :members: +.. autoclass:: DeleteRunsResponse + :members: +.. autoclass:: DeleteTag + :members: +.. autoclass:: Experiment + :members: +.. autoclass:: ExperimentAccessControlRequest + :members: +.. autoclass:: ExperimentAccessControlResponse + :members: +.. autoclass:: ExperimentPermission + :members: +.. autoclass:: ExperimentPermissions + :members: +.. autoclass:: ExperimentPermissionsDescription + :members: +.. autoclass:: ExperimentPermissionsRequest + :members: +.. autoclass:: ExperimentTag + :members: +.. autoclass:: FileInfo + :members: +.. autoclass:: GetExperimentPermissionLevelsResponse + :members: +.. autoclass:: GetExperimentResponse + :members: +.. autoclass:: GetLatestVersionsRequest + :members: +.. autoclass:: GetLatestVersionsResponse + :members: +.. autoclass:: GetMetricHistoryResponse + :members: +.. autoclass:: GetModelResponse + :members: +.. autoclass:: GetModelVersionDownloadUriResponse + :members: +.. autoclass:: GetModelVersionResponse + :members: +.. autoclass:: GetRegisteredModelPermissionLevelsResponse + :members: +.. autoclass:: GetRunResponse + :members: +.. autoclass:: HttpUrlSpec + :members: +.. autoclass:: HttpUrlSpecWithoutSecret + :members: +.. autoclass:: InputTag + :members: +.. autoclass:: JobSpec + :members: +.. autoclass:: JobSpecWithoutSecret + :members: +.. autoclass:: ListArtifactsResponse + :members: +.. autoclass:: ListExperimentsResponse + :members: +.. autoclass:: ListModelsResponse + :members: +.. autoclass:: ListRegistryWebhooks + :members: +.. autoclass:: ListTransitionRequestsResponse + :members: +.. autoclass:: LogBatch + :members: +.. autoclass:: LogInputs + :members: +.. autoclass:: LogMetric + :members: +.. autoclass:: LogModel + :members: +.. autoclass:: LogParam + :members: +.. autoclass:: Metric + :members: +.. autoclass:: Model + :members: +.. autoclass:: ModelDatabricks + :members: +.. autoclass:: ModelTag + :members: +.. autoclass:: ModelVersion + :members: +.. autoclass:: ModelVersionDatabricks + :members: +.. autoclass:: ModelVersionTag + :members: +.. autoclass:: Param + :members: +.. autoclass:: RegisteredModelAccessControlRequest + :members: +.. autoclass:: RegisteredModelAccessControlResponse + :members: +.. autoclass:: RegisteredModelPermission + :members: +.. autoclass:: RegisteredModelPermissions + :members: +.. autoclass:: RegisteredModelPermissionsDescription + :members: +.. autoclass:: RegisteredModelPermissionsRequest + :members: +.. autoclass:: RegistryWebhook + :members: +.. autoclass:: RejectTransitionRequest + :members: +.. autoclass:: RejectTransitionRequestResponse + :members: +.. autoclass:: RenameModelRequest + :members: +.. autoclass:: RenameModelResponse + :members: +.. autoclass:: RestoreExperiment + :members: +.. autoclass:: RestoreRun + :members: +.. autoclass:: RestoreRuns + :members: +.. autoclass:: RestoreRunsResponse + :members: +.. autoclass:: Run + :members: +.. autoclass:: RunData + :members: +.. autoclass:: RunInfo + :members: +.. autoclass:: RunInputs + :members: +.. autoclass:: RunTag + :members: +.. autoclass:: SearchExperiments + :members: +.. autoclass:: SearchExperimentsResponse + :members: +.. autoclass:: SearchModelVersionsResponse + :members: +.. autoclass:: SearchModelsResponse + :members: +.. autoclass:: SearchRuns + :members: +.. autoclass:: SearchRunsResponse + :members: +.. autoclass:: SetExperimentTag + :members: +.. autoclass:: SetModelTagRequest + :members: +.. autoclass:: SetModelVersionTagRequest + :members: +.. autoclass:: SetTag + :members: +.. autoclass:: TestRegistryWebhook + :members: +.. autoclass:: TestRegistryWebhookRequest + :members: +.. autoclass:: TestRegistryWebhookResponse + :members: +.. autoclass:: TransitionModelVersionStageDatabricks + :members: +.. autoclass:: TransitionRequest + :members: +.. autoclass:: TransitionStageResponse + :members: +.. autoclass:: UpdateComment + :members: +.. autoclass:: UpdateCommentResponse + :members: +.. autoclass:: UpdateExperiment + :members: +.. autoclass:: UpdateModelRequest + :members: +.. autoclass:: UpdateModelVersionRequest + :members: +.. autoclass:: UpdateRegistryWebhook + :members: +.. autoclass:: UpdateRun + :members: +.. autoclass:: UpdateRunResponse + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/oauth2.rst b/docs/dbdataclasses/oauth2.rst new file mode 100644 index 000000000..f455f0f17 --- /dev/null +++ b/docs/dbdataclasses/oauth2.rst @@ -0,0 +1,38 @@ +OAuth +===== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.oauth2`` module. + +.. py:currentmodule:: databricks.sdk.service.oauth2 +.. autoclass:: CreateCustomAppIntegration + :members: +.. autoclass:: CreateCustomAppIntegrationOutput + :members: +.. autoclass:: CreatePublishedAppIntegration + :members: +.. autoclass:: CreatePublishedAppIntegrationOutput + :members: +.. autoclass:: CreateServicePrincipalSecretResponse + :members: +.. autoclass:: GetCustomAppIntegrationOutput + :members: +.. autoclass:: GetCustomAppIntegrationsOutput + :members: +.. autoclass:: GetPublishedAppIntegrationOutput + :members: +.. autoclass:: GetPublishedAppIntegrationsOutput + :members: +.. autoclass:: GetPublishedAppsOutput + :members: +.. autoclass:: ListServicePrincipalSecretsResponse + :members: +.. autoclass:: PublishedAppOutput + :members: +.. autoclass:: SecretInfo + :members: +.. autoclass:: TokenAccessPolicy + :members: +.. autoclass:: UpdateCustomAppIntegration + :members: +.. autoclass:: UpdatePublishedAppIntegration + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/pipelines.rst b/docs/dbdataclasses/pipelines.rst new file mode 100644 index 000000000..6a9b03f20 --- /dev/null +++ b/docs/dbdataclasses/pipelines.rst @@ -0,0 +1,78 @@ +Delta Live Tables +================= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.pipelines`` module. + +.. py:currentmodule:: databricks.sdk.service.pipelines +.. autoclass:: CreatePipeline + :members: +.. autoclass:: CreatePipelineResponse + :members: +.. autoclass:: CronTrigger + :members: +.. autoclass:: DataPlaneId + :members: +.. autoclass:: EditPipeline + :members: +.. autoclass:: ErrorDetail + :members: +.. autoclass:: FileLibrary + :members: +.. autoclass:: Filters + :members: +.. autoclass:: GetPipelinePermissionLevelsResponse + :members: +.. autoclass:: GetPipelineResponse + :members: +.. autoclass:: GetUpdateResponse + :members: +.. autoclass:: ListPipelineEventsResponse + :members: +.. autoclass:: ListPipelinesResponse + :members: +.. autoclass:: ListUpdatesResponse + :members: +.. autoclass:: NotebookLibrary + :members: +.. autoclass:: Notifications + :members: +.. autoclass:: Origin + :members: +.. autoclass:: PipelineAccessControlRequest + :members: +.. autoclass:: PipelineAccessControlResponse + :members: +.. autoclass:: PipelineCluster + :members: +.. autoclass:: PipelineEvent + :members: +.. autoclass:: PipelineLibrary + :members: +.. autoclass:: PipelinePermission + :members: +.. autoclass:: PipelinePermissions + :members: +.. autoclass:: PipelinePermissionsDescription + :members: +.. autoclass:: PipelinePermissionsRequest + :members: +.. autoclass:: PipelineSpec + :members: +.. autoclass:: PipelineStateInfo + :members: +.. autoclass:: PipelineTrigger + :members: +.. autoclass:: Sequencing + :members: +.. autoclass:: SerializedException + :members: +.. autoclass:: StackFrame + :members: +.. autoclass:: StartUpdate + :members: +.. autoclass:: StartUpdateResponse + :members: +.. autoclass:: UpdateInfo + :members: +.. autoclass:: UpdateStateInfo + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/provisioning.rst b/docs/dbdataclasses/provisioning.rst new file mode 100644 index 000000000..558881a00 --- /dev/null +++ b/docs/dbdataclasses/provisioning.rst @@ -0,0 +1,74 @@ +Provisioning +============ + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.provisioning`` module. + +.. py:currentmodule:: databricks.sdk.service.provisioning +.. autoclass:: AwsCredentials + :members: +.. autoclass:: AwsKeyInfo + :members: +.. autoclass:: AzureWorkspaceInfo + :members: +.. autoclass:: CloudResourceContainer + :members: +.. autoclass:: CreateAwsKeyInfo + :members: +.. autoclass:: CreateCredentialAwsCredentials + :members: +.. autoclass:: CreateCredentialRequest + :members: +.. autoclass:: CreateCredentialStsRole + :members: +.. autoclass:: CreateCustomerManagedKeyRequest + :members: +.. autoclass:: CreateGcpKeyInfo + :members: +.. autoclass:: CreateNetworkRequest + :members: +.. autoclass:: CreateStorageConfigurationRequest + :members: +.. autoclass:: CreateVpcEndpointRequest + :members: +.. autoclass:: CreateWorkspaceRequest + :members: +.. autoclass:: Credential + :members: +.. autoclass:: CustomerFacingGcpCloudResourceContainer + :members: +.. autoclass:: CustomerManagedKey + :members: +.. autoclass:: GcpKeyInfo + :members: +.. autoclass:: GcpManagedNetworkConfig + :members: +.. autoclass:: GcpNetworkInfo + :members: +.. autoclass:: GcpVpcEndpointInfo + :members: +.. autoclass:: GkeConfig + :members: +.. autoclass:: Network + :members: +.. autoclass:: NetworkHealth + :members: +.. autoclass:: NetworkVpcEndpoints + :members: +.. autoclass:: NetworkWarning + :members: +.. autoclass:: PrivateAccessSettings + :members: +.. autoclass:: RootBucketInfo + :members: +.. autoclass:: StorageConfiguration + :members: +.. autoclass:: StsRole + :members: +.. autoclass:: UpdateWorkspaceRequest + :members: +.. autoclass:: UpsertPrivateAccessSettingsRequest + :members: +.. autoclass:: VpcEndpoint + :members: +.. autoclass:: Workspace + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/serving.rst b/docs/dbdataclasses/serving.rst new file mode 100644 index 000000000..8828e5956 --- /dev/null +++ b/docs/dbdataclasses/serving.rst @@ -0,0 +1,128 @@ +Real-time Serving +================= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.serving`` module. + +.. py:currentmodule:: databricks.sdk.service.serving +.. autoclass:: Ai21LabsConfig + :members: +.. autoclass:: AnthropicConfig + :members: +.. autoclass:: AppEvents + :members: +.. autoclass:: AppManifest + :members: +.. autoclass:: AppServiceStatus + :members: +.. autoclass:: AutoCaptureConfigInput + :members: +.. autoclass:: AutoCaptureConfigOutput + :members: +.. autoclass:: AutoCaptureState + :members: +.. autoclass:: AwsBedrockConfig + :members: +.. autoclass:: BuildLogsResponse + :members: +.. autoclass:: ChatMessage + :members: +.. autoclass:: CohereConfig + :members: +.. autoclass:: CreateServingEndpoint + :members: +.. autoclass:: DatabricksModelServingConfig + :members: +.. autoclass:: DataframeSplitInput + :members: +.. autoclass:: DeleteAppResponse + :members: +.. autoclass:: DeployAppRequest + :members: +.. autoclass:: DeploymentStatus + :members: +.. autoclass:: EmbeddingsV1ResponseEmbeddingElement + :members: +.. autoclass:: EndpointCoreConfigInput + :members: +.. autoclass:: EndpointCoreConfigOutput + :members: +.. autoclass:: EndpointCoreConfigSummary + :members: +.. autoclass:: EndpointPendingConfig + :members: +.. autoclass:: EndpointState + :members: +.. autoclass:: EndpointTag + :members: +.. autoclass:: ExternalModel + :members: +.. autoclass:: ExternalModelConfig + :members: +.. autoclass:: ExternalModelUsageElement + :members: +.. autoclass:: FoundationModel + :members: +.. autoclass:: GetAppResponse + :members: +.. autoclass:: GetServingEndpointPermissionLevelsResponse + :members: +.. autoclass:: ListAppEventsResponse + :members: +.. autoclass:: ListAppsResponse + :members: +.. autoclass:: ListEndpointsResponse + :members: +.. autoclass:: OpenAiConfig + :members: +.. autoclass:: PaLmConfig + :members: +.. autoclass:: PatchServingEndpointTags + :members: +.. autoclass:: PayloadTable + :members: +.. autoclass:: PutResponse + :members: +.. autoclass:: QueryEndpointInput + :members: +.. autoclass:: QueryEndpointResponse + :members: +.. autoclass:: RateLimit + :members: +.. autoclass:: Route + :members: +.. autoclass:: ServedEntityInput + :members: +.. autoclass:: ServedEntityOutput + :members: +.. autoclass:: ServedEntitySpec + :members: +.. autoclass:: ServedModelInput + :members: +.. autoclass:: ServedModelOutput + :members: +.. autoclass:: ServedModelSpec + :members: +.. autoclass:: ServedModelState + :members: +.. autoclass:: ServerLogsResponse + :members: +.. autoclass:: ServingEndpoint + :members: +.. autoclass:: ServingEndpointAccessControlRequest + :members: +.. autoclass:: ServingEndpointAccessControlResponse + :members: +.. autoclass:: ServingEndpointDetailed + :members: +.. autoclass:: ServingEndpointPermission + :members: +.. autoclass:: ServingEndpointPermissions + :members: +.. autoclass:: ServingEndpointPermissionsDescription + :members: +.. autoclass:: ServingEndpointPermissionsRequest + :members: +.. autoclass:: TrafficConfig + :members: +.. autoclass:: V1ResponseChoiceElement + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/settings.rst b/docs/dbdataclasses/settings.rst new file mode 100644 index 000000000..66cbb7b33 --- /dev/null +++ b/docs/dbdataclasses/settings.rst @@ -0,0 +1,96 @@ +Settings +======== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.settings`` module. + +.. py:currentmodule:: databricks.sdk.service.settings +.. autoclass:: CreateIpAccessList + :members: +.. autoclass:: CreateIpAccessListResponse + :members: +.. autoclass:: CreateNetworkConnectivityConfigRequest + :members: +.. autoclass:: CreateOboTokenRequest + :members: +.. autoclass:: CreateOboTokenResponse + :members: +.. autoclass:: CreatePrivateEndpointRuleRequest + :members: +.. autoclass:: CreateTokenRequest + :members: +.. autoclass:: CreateTokenResponse + :members: +.. autoclass:: DefaultNamespaceSetting + :members: +.. autoclass:: DeleteDefaultWorkspaceNamespaceResponse + :members: +.. autoclass:: DeletePersonalComputeSettingResponse + :members: +.. autoclass:: ExchangeToken + :members: +.. autoclass:: ExchangeTokenRequest + :members: +.. autoclass:: ExchangeTokenResponse + :members: +.. autoclass:: FetchIpAccessListResponse + :members: +.. autoclass:: GetIpAccessListResponse + :members: +.. autoclass:: GetIpAccessListsResponse + :members: +.. autoclass:: GetTokenPermissionLevelsResponse + :members: +.. autoclass:: IpAccessListInfo + :members: +.. autoclass:: ListIpAccessListResponse + :members: +.. autoclass:: ListNccAzurePrivateEndpointRulesResponse + :members: +.. autoclass:: ListNetworkConnectivityConfigurationsResponse + :members: +.. autoclass:: ListPublicTokensResponse + :members: +.. autoclass:: ListTokensResponse + :members: +.. autoclass:: NccAzurePrivateEndpointRule + :members: +.. autoclass:: NccAzureServiceEndpointRule + :members: +.. autoclass:: NccEgressConfig + :members: +.. autoclass:: NccEgressDefaultRules + :members: +.. autoclass:: NccEgressTargetRules + :members: +.. autoclass:: NetworkConnectivityConfiguration + :members: +.. autoclass:: PartitionId + :members: +.. autoclass:: PersonalComputeMessage + :members: +.. autoclass:: PersonalComputeSetting + :members: +.. autoclass:: PublicTokenInfo + :members: +.. autoclass:: ReplaceIpAccessList + :members: +.. autoclass:: RevokeTokenRequest + :members: +.. autoclass:: StringMessage + :members: +.. autoclass:: TokenAccessControlRequest + :members: +.. autoclass:: TokenAccessControlResponse + :members: +.. autoclass:: TokenInfo + :members: +.. autoclass:: TokenPermission + :members: +.. autoclass:: TokenPermissions + :members: +.. autoclass:: TokenPermissionsDescription + :members: +.. autoclass:: TokenPermissionsRequest + :members: +.. autoclass:: UpdateIpAccessList + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/sharing.rst b/docs/dbdataclasses/sharing.rst new file mode 100644 index 000000000..f9897ef5c --- /dev/null +++ b/docs/dbdataclasses/sharing.rst @@ -0,0 +1,88 @@ +Delta Sharing +============= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.sharing`` module. + +.. py:currentmodule:: databricks.sdk.service.sharing +.. autoclass:: CentralCleanRoomInfo + :members: +.. autoclass:: CleanRoomAssetInfo + :members: +.. autoclass:: CleanRoomCatalog + :members: +.. autoclass:: CleanRoomCatalogUpdate + :members: +.. autoclass:: CleanRoomCollaboratorInfo + :members: +.. autoclass:: CleanRoomInfo + :members: +.. autoclass:: CleanRoomNotebookInfo + :members: +.. autoclass:: CleanRoomTableInfo + :members: +.. autoclass:: ColumnInfo + :members: +.. autoclass:: ColumnMask + :members: +.. autoclass:: CreateCleanRoom + :members: +.. autoclass:: CreateProvider + :members: +.. autoclass:: CreateRecipient + :members: +.. autoclass:: CreateShare + :members: +.. autoclass:: GetRecipientSharePermissionsResponse + :members: +.. autoclass:: IpAccessList + :members: +.. autoclass:: ListCleanRoomsResponse + :members: +.. autoclass:: ListProviderSharesResponse + :members: +.. autoclass:: ListProvidersResponse + :members: +.. autoclass:: ListRecipientsResponse + :members: +.. autoclass:: ListSharesResponse + :members: +.. autoclass:: Partition + :members: +.. autoclass:: PartitionValue + :members: +.. autoclass:: PrivilegeAssignment + :members: +.. autoclass:: ProviderInfo + :members: +.. autoclass:: ProviderShare + :members: +.. autoclass:: RecipientInfo + :members: +.. autoclass:: RecipientProfile + :members: +.. autoclass:: RecipientTokenInfo + :members: +.. autoclass:: RetrieveTokenResponse + :members: +.. autoclass:: RotateRecipientToken + :members: +.. autoclass:: SecurablePropertiesKvPairs + :members: +.. autoclass:: ShareInfo + :members: +.. autoclass:: ShareToPrivilegeAssignment + :members: +.. autoclass:: SharedDataObject + :members: +.. autoclass:: SharedDataObjectUpdate + :members: +.. autoclass:: UpdateCleanRoom + :members: +.. autoclass:: UpdateProvider + :members: +.. autoclass:: UpdateRecipient + :members: +.. autoclass:: UpdateShare + :members: +.. autoclass:: UpdateSharePermissions + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/sql.rst b/docs/dbdataclasses/sql.rst new file mode 100644 index 000000000..50f070ca9 --- /dev/null +++ b/docs/dbdataclasses/sql.rst @@ -0,0 +1,146 @@ +Databricks SQL +============== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.sql`` module. + +.. py:currentmodule:: databricks.sdk.service.sql +.. autoclass:: AccessControl + :members: +.. autoclass:: Alert + :members: +.. autoclass:: AlertOptions + :members: +.. autoclass:: AlertQuery + :members: +.. autoclass:: BaseChunkInfo + :members: +.. autoclass:: Channel + :members: +.. autoclass:: ChannelInfo + :members: +.. autoclass:: ColumnInfo + :members: +.. autoclass:: CreateAlert + :members: +.. autoclass:: CreateWarehouseRequest + :members: +.. autoclass:: CreateWarehouseResponse + :members: +.. autoclass:: CreateWidget + :members: +.. autoclass:: Dashboard + :members: +.. autoclass:: DashboardEditContent + :members: +.. autoclass:: DashboardOptions + :members: +.. autoclass:: DashboardPostContent + :members: +.. autoclass:: DataSource + :members: +.. autoclass:: EditAlert + :members: +.. autoclass:: EditWarehouseRequest + :members: +.. autoclass:: EndpointConfPair + :members: +.. autoclass:: EndpointHealth + :members: +.. autoclass:: EndpointInfo + :members: +.. autoclass:: EndpointTagPair + :members: +.. autoclass:: EndpointTags + :members: +.. autoclass:: ExecuteStatementRequest + :members: +.. autoclass:: ExecuteStatementResponse + :members: +.. autoclass:: ExternalLink + :members: +.. autoclass:: GetResponse + :members: +.. autoclass:: GetStatementResponse + :members: +.. autoclass:: GetWarehousePermissionLevelsResponse + :members: +.. autoclass:: GetWarehouseResponse + :members: +.. autoclass:: GetWorkspaceWarehouseConfigResponse + :members: +.. autoclass:: ListQueriesResponse + :members: +.. autoclass:: ListResponse + :members: +.. autoclass:: ListWarehousesResponse + :members: +.. autoclass:: OdbcParams + :members: +.. autoclass:: Parameter + :members: +.. autoclass:: Query + :members: +.. autoclass:: QueryEditContent + :members: +.. autoclass:: QueryFilter + :members: +.. autoclass:: QueryInfo + :members: +.. autoclass:: QueryList + :members: +.. autoclass:: QueryMetrics + :members: +.. autoclass:: QueryOptions + :members: +.. autoclass:: QueryPostContent + :members: +.. autoclass:: RepeatedEndpointConfPairs + :members: +.. autoclass:: ResultData + :members: +.. autoclass:: ResultManifest + :members: +.. autoclass:: ResultSchema + :members: +.. autoclass:: ServiceError + :members: +.. autoclass:: SetResponse + :members: +.. autoclass:: SetWorkspaceWarehouseConfigRequest + :members: +.. autoclass:: StatementParameterListItem + :members: +.. autoclass:: StatementStatus + :members: +.. autoclass:: Success + :members: +.. autoclass:: TerminationReason + :members: +.. autoclass:: TimeRange + :members: +.. autoclass:: TransferOwnershipObjectId + :members: +.. autoclass:: User + :members: +.. autoclass:: Visualization + :members: +.. autoclass:: WarehouseAccessControlRequest + :members: +.. autoclass:: WarehouseAccessControlResponse + :members: +.. autoclass:: WarehousePermission + :members: +.. autoclass:: WarehousePermissions + :members: +.. autoclass:: WarehousePermissionsDescription + :members: +.. autoclass:: WarehousePermissionsRequest + :members: +.. autoclass:: WarehouseTypePair + :members: +.. autoclass:: Widget + :members: +.. autoclass:: WidgetOptions + :members: +.. autoclass:: WidgetPosition + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/vectorsearch.rst b/docs/dbdataclasses/vectorsearch.rst new file mode 100644 index 000000000..5f48bf4b4 --- /dev/null +++ b/docs/dbdataclasses/vectorsearch.rst @@ -0,0 +1,60 @@ +Vector Search +============= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.vectorsearch`` module. + +.. py:currentmodule:: databricks.sdk.service.vectorsearch +.. autoclass:: ColumnInfo + :members: +.. autoclass:: CreateEndpoint + :members: +.. autoclass:: CreateVectorIndexRequest + :members: +.. autoclass:: CreateVectorIndexResponse + :members: +.. autoclass:: DeleteDataResult + :members: +.. autoclass:: DeleteDataVectorIndexRequest + :members: +.. autoclass:: DeleteDataVectorIndexResponse + :members: +.. autoclass:: DeltaSyncVectorIndexSpecRequest + :members: +.. autoclass:: DeltaSyncVectorIndexSpecResponse + :members: +.. autoclass:: DirectAccessVectorIndexSpec + :members: +.. autoclass:: EmbeddingConfig + :members: +.. autoclass:: EmbeddingSourceColumn + :members: +.. autoclass:: EmbeddingVectorColumn + :members: +.. autoclass:: EndpointInfo + :members: +.. autoclass:: EndpointStatus + :members: +.. autoclass:: ListEndpointResponse + :members: +.. autoclass:: ListVectorIndexesResponse + :members: +.. autoclass:: MiniVectorIndex + :members: +.. autoclass:: QueryVectorIndexRequest + :members: +.. autoclass:: QueryVectorIndexResponse + :members: +.. autoclass:: ResultData + :members: +.. autoclass:: ResultManifest + :members: +.. autoclass:: UpsertDataResult + :members: +.. autoclass:: UpsertDataVectorIndexRequest + :members: +.. autoclass:: UpsertDataVectorIndexResponse + :members: +.. autoclass:: VectorIndex + :members: +.. autoclass:: VectorIndexStatus + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/workspace.rst b/docs/dbdataclasses/workspace.rst new file mode 100644 index 000000000..4fa00e31b --- /dev/null +++ b/docs/dbdataclasses/workspace.rst @@ -0,0 +1,96 @@ +Workspace +========= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.workspace`` module. + +.. py:currentmodule:: databricks.sdk.service.workspace +.. autoclass:: AclItem + :members: +.. autoclass:: AzureKeyVaultSecretScopeMetadata + :members: +.. autoclass:: CreateCredentials + :members: +.. autoclass:: CreateCredentialsResponse + :members: +.. autoclass:: CreateRepo + :members: +.. autoclass:: CreateScope + :members: +.. autoclass:: CredentialInfo + :members: +.. autoclass:: Delete + :members: +.. autoclass:: DeleteAcl + :members: +.. autoclass:: DeleteScope + :members: +.. autoclass:: DeleteSecret + :members: +.. autoclass:: ExportResponse + :members: +.. autoclass:: GetCredentialsResponse + :members: +.. autoclass:: GetRepoPermissionLevelsResponse + :members: +.. autoclass:: GetSecretResponse + :members: +.. autoclass:: GetWorkspaceObjectPermissionLevelsResponse + :members: +.. autoclass:: Import + :members: +.. autoclass:: ListAclsResponse + :members: +.. autoclass:: ListReposResponse + :members: +.. autoclass:: ListResponse + :members: +.. autoclass:: ListScopesResponse + :members: +.. autoclass:: ListSecretsResponse + :members: +.. autoclass:: Mkdirs + :members: +.. autoclass:: ObjectInfo + :members: +.. autoclass:: PutAcl + :members: +.. autoclass:: PutSecret + :members: +.. autoclass:: RepoAccessControlRequest + :members: +.. autoclass:: RepoAccessControlResponse + :members: +.. autoclass:: RepoInfo + :members: +.. autoclass:: RepoPermission + :members: +.. autoclass:: RepoPermissions + :members: +.. autoclass:: RepoPermissionsDescription + :members: +.. autoclass:: RepoPermissionsRequest + :members: +.. autoclass:: SecretMetadata + :members: +.. autoclass:: SecretScope + :members: +.. autoclass:: SparseCheckout + :members: +.. autoclass:: SparseCheckoutUpdate + :members: +.. autoclass:: UpdateCredentials + :members: +.. autoclass:: UpdateRepo + :members: +.. autoclass:: WorkspaceObjectAccessControlRequest + :members: +.. autoclass:: WorkspaceObjectAccessControlResponse + :members: +.. autoclass:: WorkspaceObjectPermission + :members: +.. autoclass:: WorkspaceObjectPermissions + :members: +.. autoclass:: WorkspaceObjectPermissionsDescription + :members: +.. autoclass:: WorkspaceObjectPermissionsRequest + :members: \ No newline at end of file diff --git a/docs/gen-client-docs.py b/docs/gen-client-docs.py index 948da61b7..23ab1a5e6 100644 --- a/docs/gen-client-docs.py +++ b/docs/gen-client-docs.py @@ -1,8 +1,21 @@ #!env python3 +import collections +import dbdataclasses +import inspect +import json import os.path -from dataclasses import dataclass +import subprocess +import importlib +from dataclasses import dataclass, is_dataclass +from enum import Enum +from pathlib import Path +from typing import Optional, Any, get_args + +from databricks.sdk import AccountClient, WorkspaceClient +from databricks.sdk.core import credentials_provider __dir__ = os.path.dirname(__file__) +__examples__ = Path(f'{__dir__}/../examples').absolute() @dataclass @@ -12,9 +25,157 @@ class Package: description: str +@dataclass +class Tag: + name: str + service: str + is_account: bool + package: Package + + +@dataclass +class TypedArgument: + name: str + tpe: Optional[str] + default: Optional[Any] + + def __str__(self): + ret = self.name + if self.tpe is not None: + ret += f': {self.tpe}' + elif self.default is not None: + tpe = type(self.default) + if tpe.__module__ == 'builtins': + ret += f': {tpe.__name__}' + else: + ret += f': {tpe.__module__}.{tpe.__name__}' + if self.default is not None: + ret += f' = {self.default}' + return ret + + +@dataclass +class MethodDoc: + method_name: str + doc: Optional[str] + required_args: list[TypedArgument] + kwonly_args: list[TypedArgument] + return_type: Optional[str] + + def argspec(self): + args = ', '.join([str(x) for x in self.required_args]) + if len(self.kwonly_args) > 0: + other = ', '.join([str(x) for x in self.kwonly_args]) + args = f'{args} [, {other}]' + return args + + def as_rst(self, usage) -> str: + ret_annotation = f' -> {self.return_type}' if self.return_type is not None else '' + out = ['', f' .. py:method:: {self.method_name}({self.argspec()}){ret_annotation}', ''] + if usage != '': + out.append(usage) + if self.doc is not None: + out.append(f' {self.doc}') + return "\n".join(out) + + +@dataclass +class ServiceDoc: + client_prefix: str + service_name: str + class_name: str + methods: list[MethodDoc] + doc: str + tag: Tag + + def as_rst(self) -> str: + if not self.doc: + self.doc = '' + title = f'``{self.client_prefix}.{self.service_name}``: {self.tag.name}' + out = [ + title, '=' * len(title), + f'.. currentmodule:: databricks.sdk.service.{self.tag.package.name}', '', + f'.. py:class:: {self.class_name}', '', f' {self.doc}' + ] + for m in self.methods: + usage = self.usage_example(m) + rst = m.as_rst(usage) + if not rst: + continue + out.append(rst) + + return "\n".join(out) + + def usage_example(self, m): + out = [] + example_root, example_files = self.examples() + for potential_example in example_files: + if not potential_example.startswith(m.method_name): + continue + out.append("") + out.append(" Usage:") + out.append("") + out.append(" .. code-block::") + out.append("") + with (example_root / potential_example).open('r') as f: + for line in f.readlines(): + line = line.rstrip("\n") + out.append(f' {line}') + out.append("") + return "\n".join(out) + return "" + + def examples(self): + try: + root = __examples__ / self.service_name + return root, os.listdir(root) + except: + return None, [] + +@dataclass +class DataclassesDoc: + package: Package + dataclasses: list[str] + + def as_rst(self) -> str: + title = f'{self.package.label}' + out = [ + title, '=' * len(title), '', + f'These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.{self.package.name}`` module.', + '', + f'.. py:currentmodule:: databricks.sdk.service.{self.package.name}', + ] + for d in self.dataclasses: + out.append(self.dataclass_rst(d)) + return "\n".join(out) + + def dataclass_rst(self, cls) -> str: + mod = importlib.import_module(f'databricks.sdk.service.{self.package.name}') + clss = getattr(mod, cls) + if issubclass(clss, Enum): + out = [ + f'.. py:class:: {cls}', + '', + ] + if clss.__doc__ is not None: + out.append(f' {clss.__doc__}') + out.append('') + for v in clss.__members__.keys(): + out.append(f' .. py:attribute:: {v}') + out.append(f' :value: "{v}"') + out.append('') + else: + out = [ + f'.. autoclass:: {cls}', + ' :members:' + '' + ] + return "\n".join(out) + + class Generator: packages = [ - Package("workspace", "Databricks Workspace", + Package("workspace", "Workspace", "Manage workspace-level entities that include notebooks, Git checkouts, and secrets"), Package("compute", "Compute", "Use and configure compute for Databricks"), Package("jobs", "Jobs", "Schedule automated jobs on Databricks Workspaces"), @@ -42,50 +203,200 @@ class Generator: "Resource management for secure Databricks Workspace deployment, cross-account IAM roles, " + "storage, encryption, networking and private access."), Package("billing", "Billing", "Configure different aspects of Databricks billing and usage."), - Package("oauth2", "OAuth", "Configure OAuth 2.0 application registrations for Databricks") + Package("oauth2", "OAuth", "Configure OAuth 2.0 application registrations for Databricks"), + Package("vectorsearch", "Vector Search", "Create and query Vector Search indexes"), + Package("dashboards", "Dashboards", "Manage Lakeview dashboards"), ] - def write_reference(self): + def __init__(self): + self.mapping = self._load_mapping() + + def _openapi_spec(self) -> str: + if 'DATABRICKS_OPENAPI_SPEC' in os.environ: + with open(os.environ['DATABRICKS_OPENAPI_SPEC']) as f: + return f.read() + with open(f'{__dir__}/../.codegen/_openapi_sha') as f: + sha = f.read().strip() + return subprocess.check_output(['deco', 'openapi', 'get', sha]).decode('utf-8') + + def _load_mapping(self) -> dict[str, Tag]: + mapping = {} + pkgs = {p.name: p for p in self.packages} + spec = json.loads(self._openapi_spec()) + for tag in spec['tags']: + t = Tag(name=tag['name'], + service=tag['x-databricks-service'], + is_account=tag.get('x-databricks-is-accounts', False), + package=pkgs[tag['x-databricks-package']]) + mapping[tag['name']] = t + return mapping + + @staticmethod + def _get_type_from_annotations(annotations, name): + tpe = annotations.get(name) + if len(get_args(tpe)) > 0: + tpe = get_args(tpe)[0] + if isinstance(tpe, type): + tpe = tpe.__name__ + return tpe + + @staticmethod + def _to_typed_args(argspec: inspect.FullArgSpec, required: bool) -> list[TypedArgument]: + annotations = argspec.annotations if argspec.annotations is not None else {} + if required: + argslist = argspec.args[1:] + defaults = {} + for i, x in enumerate(argspec.defaults if argspec.defaults is not None else []): + defaults[argslist[i - len(argspec.defaults)]] = x + else: + argslist = argspec.kwonlyargs + defaults = argspec.kwonlydefaults + out = [] + for arg in argslist: + tpe = Generator._get_type_from_annotations(annotations, arg) + out.append(TypedArgument(name=arg, tpe=tpe, default=defaults.get(arg))) + return out + + def class_methods(self, inst) -> list[MethodDoc]: + method_docs = [] + for name in dir(inst): + if name[0] == '_': + # private members + continue + instance_attr = getattr(inst, name) + if not callable(instance_attr): + continue + args = inspect.getfullargspec(instance_attr) + method_docs.append( + MethodDoc(method_name=name, + required_args=self._to_typed_args(args, required=True), + kwonly_args=self._to_typed_args(args, required=False), + doc=instance_attr.__doc__, + return_type=Generator._get_type_from_annotations(args.annotations, 'return'))) + return method_docs + + def service_docs(self, client_inst) -> list[ServiceDoc]: + client_prefix = 'w' if isinstance(client_inst, WorkspaceClient) else 'a' + ignore_client_fields = ('config', 'dbutils', 'api_client', 'files') + all = [] + for service_name, service_inst in inspect.getmembers(client_inst): + if service_name.startswith('_'): + continue + if service_name in ignore_client_fields: + continue + class_doc = service_inst.__doc__ + class_name = service_inst.__class__.__name__ + all.append( + ServiceDoc(client_prefix=client_prefix, + service_name=service_name, + class_name=class_name, + doc=class_doc, + tag=self._get_tag_name(service_inst.__class__.__name__, service_name), + methods=self.class_methods(service_inst))) + return all + + @staticmethod + def _should_document(obj): + return is_dataclass(obj) or (type(obj) == type and issubclass(obj, Enum)) + + @staticmethod + def _make_folder_if_not_exists(folder): + if not os.path.exists(folder): + os.makedirs(folder) + + def write_dataclass_docs(self): + self._make_folder_if_not_exists(f'{__dir__}/dbdataclasses') for pkg in self.packages: - self._write_client_package_doc(pkg) - self._write_reference_toc() + module = importlib.import_module(f'databricks.sdk.service.{pkg.name}') + all_members = [name for name, _ in inspect.getmembers(module, predicate=self._should_document)] + doc = DataclassesDoc(package=pkg, dataclasses=sorted(all_members)) + with open(f'{__dir__}/dbdataclasses/{pkg.name}.rst', 'w') as f: + f.write(doc.as_rst()) + all = "\n ".join([f'{p.name}' for p in self.packages]) + with open(f'{__dir__}/dbdataclasses/index.rst', 'w') as f: + f.write(f''' +Dataclasses +=========== - def _write_client_package_doc(self, pkg: Package): - title = f'``{pkg.name}``: {pkg.label}' - has_mixin = os.path.exists(f'{__dir__}/../databricks/sdk/mixins/{pkg.name}.py') - with open(f'{__dir__}/autogen/{pkg.name}.rst', 'w') as f: +.. toctree:: + :maxdepth: 1 + + {all}''') + + def _get_tag_name(self, class_name, service_name) -> Tag: + if class_name[-3:] == 'Ext': + # ClustersExt, DbfsExt, WorkspaceExt, but not ExternalLocations + class_name = class_name.replace('Ext', 'API') + class_name = class_name[:-3] + for tag_name, t in self.mapping.items(): + if t.service == class_name: + return t + raise KeyError(f'Cannot find {class_name} / {service_name} tag') + + def load_client(self, client, folder, label, description): + client_services = [] + package_to_services = collections.defaultdict(list) + service_docs = self.service_docs(client) + for svc in service_docs: + client_services.append(svc.service_name) + package = svc.tag.package.name + package_to_services[package].append(svc.service_name) + self._make_folder_if_not_exists(f'{__dir__}/{folder}/{package}') + with open(f'{__dir__}/{folder}/{package}/{svc.service_name}.rst', 'w') as f: + f.write(svc.as_rst()) + ordered_packages = [] + for pkg in self.packages: + if pkg.name not in package_to_services: + continue + ordered_packages.append(pkg.name) + self._write_client_package_doc(folder, pkg, package_to_services[pkg.name]) + self._write_client_packages(folder, label, description, ordered_packages) + + def _write_client_packages(self, folder: str, label: str, description: str, packages: list[str]): + """Writes out the top-level index for the APIs supported by a client.""" + self._make_folder_if_not_exists(f'{__dir__}/{folder}') + with open(f'{__dir__}/{folder}/index.rst', 'w') as f: + all = "\n ".join([f'{name}/index' for name in packages]) f.write(f''' -{title} -{'=' * len(title)} +{label} +{'=' * len(label)} -{pkg.description} - -.. automodule:: databricks.sdk.service.{pkg.name} - :members: - :undoc-members: -''') - if has_mixin: - f.write(f''' -.. automodule:: databricks.sdk.mixins.{pkg.name} - :members: - :inherited-members: - :undoc-members: -''') - - def _write_reference_toc(self): - all = '\n'.join([f' {p.name}' for p in sorted(self.packages, key=lambda p: p.name)]) - with open(f'{__dir__}/autogen/reference.rst', 'w') as f: +{description} + +.. toctree:: + :maxdepth: 1 + + {all}''') + + def _write_client_package_doc(self, folder: str, pkg: Package, services: list[str]): + """Writes out the index for a single package supported by a client.""" + self._make_folder_if_not_exists(f'{__dir__}/{folder}/{pkg.name}') + with open(f'{__dir__}/{folder}/{pkg.name}/index.rst', 'w') as f: + all = "\n ".join(services) f.write(f''' -Reference -========= +{pkg.label} +{'=' * len(pkg.label)} + +{pkg.description} .. toctree:: :maxdepth: 1 - -{all} -''') + + {all}''') if __name__ == '__main__': + + @credentials_provider('noop', []) + def noop_credentials(_: any): + return lambda: {} + gen = Generator() - gen.write_reference() + + w = WorkspaceClient(credentials_provider=noop_credentials) + gen.load_client(w, 'workspace', 'Workspace APIs', 'These APIs are available from WorkspaceClient') + + a = AccountClient(credentials_provider=noop_credentials) + gen.load_client(a, 'account', 'Account APIs', 'These APIs are available from AccountClient') + + gen.write_dataclass_docs() diff --git a/docs/getting-started.md b/docs/getting-started.md index f0b0c6439..71f735c62 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -20,6 +20,41 @@ followed by dbutils.library.restartPython() ``` +## Usage Overview + +At its core, the SDK exposes two primary clients: `databricks.sdk.WorkspaceClient` and `databricks.sdk.AccountClient`. The `WorkspaceClient` is tailored for interacting with resources within the Databricks workspace, such as notebooks, jobs, and clusters, while the `AccountClient` focuses on account-level functionalities including user and group management, billing, and workspace provisioning and management. + +To use the SDK to call an API, first find the API in either the [Workspace API Reference](workspace/index.rst) or [Account API reference](account/index.rst). Then, on the appropriate client, call the corresponding method. All API calls have the form + +``` +w..() +``` +or +``` +a..(parameters) +``` + +For example, to list all SQL queries in the workspace, run: + +```python +# Authenticate as described above +from databricks.sdk import WorkspaceClient +w = WorkspaceClient() +for query in w.queries.list(): + print(f'query {query.name} was created at {query.created_at}') +``` + +To list all workspaces in the account, run: + +```python +# Authenticate as described above +from databricks.sdk import AccountClient +a = AccountClient() +for workspace in a.workspaces.list(): + print(f'workspace {workspace.workspace_name} was created at {workspace.creation_time}') +``` + + ## Authentication There are two primary entry points to the Databricks SDK: @@ -61,7 +96,7 @@ The Databricks SDK for Python makes use of Python's data classes and enums to re Specific data classes are organized into separate packages under `databricks.sdk.service`. For example, `databricks.sdk.service.jobs` has defintions for data classes & enums related to the Jobs API. -For more information, consult the [API Reference](autogen/reference.rst). +For more information, consult the [Dataclasses API Reference](dbdataclasses/index.rst). ## Examples diff --git a/docs/index.rst b/docs/index.rst index 581077432..a4873c43e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,7 +8,7 @@ This SDK is supported for production use cases, but we do expect future releases We are keen to hear feedback from you on these SDKs. Please `file GitHub issues `_, and we will address them. .. toctree:: - :maxdepth: 2 + :maxdepth: 3 getting-started authentication @@ -17,7 +17,10 @@ We are keen to hear feedback from you on these SDKs. Please `file GitHub issues pagination logging dbutils - clients - autogen/reference + clients/workspace + workspace/index + clients/account + account/index + dbdataclasses/index diff --git a/docs/oauth.md b/docs/oauth.md index b5768522b..b04adbfac 100644 --- a/docs/oauth.md +++ b/docs/oauth.md @@ -55,11 +55,11 @@ for cl in clusters: It will launch a browser, prompting user to login with Azure credentials and give consent like described on the following screen: -![](./images/aad-approve-app.png) +![](images/aad-approve-app.png) After giving consent, the user can close the browser tab: -![](./images/external-browser-finish.png) +![](images/external-browser-finish.png) ### Public Client 3-legged OAuth flow on local machines @@ -316,4 +316,4 @@ custom_app = account_client.custom_app_integration.create( logging.info(f'Created new custom app: ' f'--client_id {custom_app.client_id} ' f'--client_secret {custom_app.client_secret}') -``` \ No newline at end of file +``` diff --git a/docs/workspace/catalog/artifact_allowlists.rst b/docs/workspace/catalog/artifact_allowlists.rst new file mode 100644 index 000000000..349bbbd0f --- /dev/null +++ b/docs/workspace/catalog/artifact_allowlists.rst @@ -0,0 +1,37 @@ +``w.artifact_allowlists``: Artifact Allowlists +============================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: ArtifactAllowlistsAPI + + In Databricks Runtime 13.3 and above, you can add libraries and init scripts to the `allowlist` in UC so + that users can leverage these artifacts on compute configured with shared access mode. + + .. py:method:: get(artifact_type: ArtifactType) -> ArtifactAllowlistInfo + + Get an artifact allowlist. + + Get the artifact allowlist of a certain artifact type. The caller must be a metastore admin or have + the **MANAGE ALLOWLIST** privilege on the metastore. + + :param artifact_type: :class:`ArtifactType` + The artifact type of the allowlist. + + :returns: :class:`ArtifactAllowlistInfo` + + + .. py:method:: update(artifact_type: ArtifactType, artifact_matchers: List[ArtifactMatcher]) -> ArtifactAllowlistInfo + + Set an artifact allowlist. + + Set the artifact allowlist of a certain artifact type. The whole artifact allowlist is replaced with + the new allowlist. The caller must be a metastore admin or have the **MANAGE ALLOWLIST** privilege on + the metastore. + + :param artifact_type: :class:`ArtifactType` + The artifact type of the allowlist. + :param artifact_matchers: List[:class:`ArtifactMatcher`] + A list of allowed artifact match patterns. + + :returns: :class:`ArtifactAllowlistInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/catalogs.rst b/docs/workspace/catalog/catalogs.rst new file mode 100644 index 000000000..5592152bb --- /dev/null +++ b/docs/workspace/catalog/catalogs.rst @@ -0,0 +1,169 @@ +``w.catalogs``: Catalogs +======================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: CatalogsAPI + + A catalog is the first layer of Unity Catalog’s three-level namespace. It’s used to organize your data + assets. Users can see all catalogs on which they have been assigned the USE_CATALOG data permission. + + In Unity Catalog, admins and data stewards manage users and their access to data centrally across all of + the workspaces in a Databricks account. Users in different workspaces can share access to the same data, + depending on privileges granted centrally in Unity Catalog. + + .. py:method:: create(name: str [, comment: Optional[str], connection_name: Optional[str], options: Optional[Dict[str, str]], properties: Optional[Dict[str, str]], provider_name: Optional[str], share_name: Optional[str], storage_root: Optional[str]]) -> CatalogInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + # cleanup + w.catalogs.delete(name=created.name, force=True) + + Create a catalog. + + Creates a new catalog instance in the parent metastore if the caller is a metastore admin or has the + **CREATE_CATALOG** privilege. + + :param name: str + Name of catalog. + :param comment: str (optional) + User-provided free-form text description. + :param connection_name: str (optional) + The name of the connection to an external data source. + :param options: Dict[str,str] (optional) + A map of key-value properties attached to the securable. + :param properties: Dict[str,str] (optional) + A map of key-value properties attached to the securable. + :param provider_name: str (optional) + The name of delta sharing provider. + + A Delta Sharing catalog is a catalog that is based on a Delta share on a remote sharing server. + :param share_name: str (optional) + The name of the share under the share provider. + :param storage_root: str (optional) + Storage root URL for managed tables within catalog. + + :returns: :class:`CatalogInfo` + + + .. py:method:: delete(name: str [, force: Optional[bool]]) + + Delete a catalog. + + Deletes the catalog that matches the supplied name. The caller must be a metastore admin or the owner + of the catalog. + + :param name: str + The name of the catalog. + :param force: bool (optional) + Force deletion even if the catalog is not empty. + + + + + .. py:method:: get(name: str) -> CatalogInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + _ = w.catalogs.get(name=created.name) + + # cleanup + w.catalogs.delete(name=created.name, force=True) + + Get a catalog. + + Gets the specified catalog in a metastore. The caller must be a metastore admin, the owner of the + catalog, or a user that has the **USE_CATALOG** privilege set for their account. + + :param name: str + The name of the catalog. + + :returns: :class:`CatalogInfo` + + + .. py:method:: list() -> Iterator[CatalogInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.catalogs.list() + + List catalogs. + + Gets an array of catalogs in the metastore. If the caller is the metastore admin, all catalogs will be + retrieved. Otherwise, only catalogs owned by the caller (or for which the caller has the + **USE_CATALOG** privilege) will be retrieved. There is no guarantee of a specific ordering of the + elements in the array. + + :returns: Iterator over :class:`CatalogInfo` + + + .. py:method:: update(name: str [, comment: Optional[str], enable_predictive_optimization: Optional[EnablePredictiveOptimization], isolation_mode: Optional[IsolationMode], new_name: Optional[str], owner: Optional[str], properties: Optional[Dict[str, str]]]) -> CatalogInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + _ = w.catalogs.update(name=created.name, comment="updated") + + # cleanup + w.catalogs.delete(name=created.name, force=True) + + Update a catalog. + + Updates the catalog that matches the supplied name. The caller must be either the owner of the + catalog, or a metastore admin (when changing the owner field of the catalog). + + :param name: str + The name of the catalog. + :param comment: str (optional) + User-provided free-form text description. + :param enable_predictive_optimization: :class:`EnablePredictiveOptimization` (optional) + Whether predictive optimization should be enabled for this object and objects under it. + :param isolation_mode: :class:`IsolationMode` (optional) + Whether the current securable is accessible from all workspaces or a specific set of workspaces. + :param new_name: str (optional) + New name for the catalog. + :param owner: str (optional) + Username of current owner of catalog. + :param properties: Dict[str,str] (optional) + A map of key-value properties attached to the securable. + + :returns: :class:`CatalogInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/connections.rst b/docs/workspace/catalog/connections.rst new file mode 100644 index 000000000..6125db714 --- /dev/null +++ b/docs/workspace/catalog/connections.rst @@ -0,0 +1,208 @@ +``w.connections``: Connections +============================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: ConnectionsAPI + + Connections allow for creating a connection to an external data source. + + A connection is an abstraction of an external data source that can be connected from Databricks Compute. + Creating a connection object is the first step to managing external data sources within Unity Catalog, + with the second step being creating a data object (catalog, schema, or table) using the connection. Data + objects derived from a connection can be written to or read from similar to other Unity Catalog data + objects based on cloud storage. Users may create different types of connections with each connection + having a unique set of configuration options to support credential management and other settings. + + .. py:method:: create(name: str, connection_type: ConnectionType, options: Dict[str, str] [, comment: Optional[str], properties: Optional[Dict[str, str]], read_only: Optional[bool]]) -> ConnectionInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + conn_create = w.connections.create(comment="Go SDK Acceptance Test Connection", + connection_type=catalog.ConnectionType.DATABRICKS, + name=f'sdk-{time.time_ns()}', + options={ + "host": + "%s-fake-workspace.cloud.databricks.com" % (f'sdk-{time.time_ns()}'), + "httpPath": + "/sql/1.0/warehouses/%s" % (f'sdk-{time.time_ns()}'), + "personalAccessToken": + f'sdk-{time.time_ns()}', + }) + + # cleanup + w.connections.delete(name_arg=conn_create.name) + + Create a connection. + + Creates a new connection + + Creates a new connection to an external data source. It allows users to specify connection details and + configurations for interaction with the external server. + + :param name: str + Name of the connection. + :param connection_type: :class:`ConnectionType` + The type of connection. + :param options: Dict[str,str] + A map of key-value properties attached to the securable. + :param comment: str (optional) + User-provided free-form text description. + :param properties: Dict[str,str] (optional) + An object containing map of key-value properties attached to the connection. + :param read_only: bool (optional) + If the connection is read only. + + :returns: :class:`ConnectionInfo` + + + .. py:method:: delete(name_arg: str) + + Delete a connection. + + Deletes the connection that matches the supplied name. + + :param name_arg: str + The name of the connection to be deleted. + + + + + .. py:method:: get(name_arg: str) -> ConnectionInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + conn_create = w.connections.create(comment="Go SDK Acceptance Test Connection", + connection_type=catalog.ConnectionType.DATABRICKS, + name=f'sdk-{time.time_ns()}', + options={ + "host": + "%s-fake-workspace.cloud.databricks.com" % (f'sdk-{time.time_ns()}'), + "httpPath": + "/sql/1.0/warehouses/%s" % (f'sdk-{time.time_ns()}'), + "personalAccessToken": + f'sdk-{time.time_ns()}', + }) + + conn_update = w.connections.update(name=conn_create.name, + name_arg=conn_create.name, + options={ + "host": + "%s-fake-workspace.cloud.databricks.com" % (f'sdk-{time.time_ns()}'), + "httpPath": + "/sql/1.0/warehouses/%s" % (f'sdk-{time.time_ns()}'), + "personalAccessToken": + f'sdk-{time.time_ns()}', + }) + + conn = w.connections.get(name_arg=conn_update.name) + + # cleanup + w.connections.delete(name_arg=conn_create.name) + + Get a connection. + + Gets a connection from it's name. + + :param name_arg: str + Name of the connection. + + :returns: :class:`ConnectionInfo` + + + .. py:method:: list() -> Iterator[ConnectionInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + conn_list = w.connections.list() + + List connections. + + List all connections. + + :returns: Iterator over :class:`ConnectionInfo` + + + .. py:method:: update(name_arg: str, options: Dict[str, str] [, name: Optional[str], new_name: Optional[str], owner: Optional[str]]) -> ConnectionInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + conn_create = w.connections.create(comment="Go SDK Acceptance Test Connection", + connection_type=catalog.ConnectionType.DATABRICKS, + name=f'sdk-{time.time_ns()}', + options={ + "host": + "%s-fake-workspace.cloud.databricks.com" % (f'sdk-{time.time_ns()}'), + "httpPath": + "/sql/1.0/warehouses/%s" % (f'sdk-{time.time_ns()}'), + "personalAccessToken": + f'sdk-{time.time_ns()}', + }) + + conn_update = w.connections.update(name=conn_create.name, + name_arg=conn_create.name, + options={ + "host": + "%s-fake-workspace.cloud.databricks.com" % (f'sdk-{time.time_ns()}'), + "httpPath": + "/sql/1.0/warehouses/%s" % (f'sdk-{time.time_ns()}'), + "personalAccessToken": + f'sdk-{time.time_ns()}', + }) + + # cleanup + w.connections.delete(name_arg=conn_create.name) + + Update a connection. + + Updates the connection that matches the supplied name. + + :param name_arg: str + Name of the connection. + :param options: Dict[str,str] + A map of key-value properties attached to the securable. + :param name: str (optional) + Name of the connection. + :param new_name: str (optional) + New name for the connection. + :param owner: str (optional) + Username of current owner of the connection. + + :returns: :class:`ConnectionInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/external_locations.rst b/docs/workspace/catalog/external_locations.rst new file mode 100644 index 000000000..e9e86fb41 --- /dev/null +++ b/docs/workspace/catalog/external_locations.rst @@ -0,0 +1,222 @@ +``w.external_locations``: External Locations +============================================ +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: ExternalLocationsAPI + + An external location is an object that combines a cloud storage path with a storage credential that + authorizes access to the cloud storage path. Each external location is subject to Unity Catalog + access-control policies that control which users and groups can access the credential. If a user does not + have access to an external location in Unity Catalog, the request fails and Unity Catalog does not attempt + to authenticate to your cloud tenant on the user’s behalf. + + Databricks recommends using external locations rather than using storage credentials directly. + + To create external locations, you must be a metastore admin or a user with the + **CREATE_EXTERNAL_LOCATION** privilege. + + .. py:method:: create(name: str, url: str, credential_name: str [, access_point: Optional[str], comment: Optional[str], encryption_details: Optional[EncryptionDetails], read_only: Optional[bool], skip_validation: Optional[bool]]) -> ExternalLocationInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + storage_credential = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), + comment="created via SDK") + + external_location = w.external_locations.create(name=f'sdk-{time.time_ns()}', + credential_name=storage_credential.name, + comment="created via SDK", + url="s3://" + os.environ["TEST_BUCKET"] + "/" + + f'sdk-{time.time_ns()}') + + # cleanup + w.storage_credentials.delete(name=storage_credential.name) + w.external_locations.delete(name=external_location.name) + + Create an external location. + + Creates a new external location entry in the metastore. The caller must be a metastore admin or have + the **CREATE_EXTERNAL_LOCATION** privilege on both the metastore and the associated storage + credential. + + :param name: str + Name of the external location. + :param url: str + Path URL of the external location. + :param credential_name: str + Name of the storage credential used with this location. + :param access_point: str (optional) + The AWS access point to use when accesing s3 for this external location. + :param comment: str (optional) + User-provided free-form text description. + :param encryption_details: :class:`EncryptionDetails` (optional) + Encryption options that apply to clients connecting to cloud storage. + :param read_only: bool (optional) + Indicates whether the external location is read-only. + :param skip_validation: bool (optional) + Skips validation of the storage credential associated with the external location. + + :returns: :class:`ExternalLocationInfo` + + + .. py:method:: delete(name: str [, force: Optional[bool]]) + + Delete an external location. + + Deletes the specified external location from the metastore. The caller must be the owner of the + external location. + + :param name: str + Name of the external location. + :param force: bool (optional) + Force deletion even if there are dependent external tables or mounts. + + + + + .. py:method:: get(name: str) -> ExternalLocationInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + credential = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + created = w.external_locations.create(name=f'sdk-{time.time_ns()}', + credential_name=credential.name, + url=f's3://{os.environ["TEST_BUCKET"]}/sdk-{time.time_ns()}') + + _ = w.external_locations.get(get=created.name) + + # cleanup + w.storage_credentials.delete(delete=credential.name) + w.external_locations.delete(delete=created.name) + + Get an external location. + + Gets an external location from the metastore. The caller must be either a metastore admin, the owner + of the external location, or a user that has some privilege on the external location. + + :param name: str + Name of the external location. + + :returns: :class:`ExternalLocationInfo` + + + .. py:method:: list( [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[ExternalLocationInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + all = w.external_locations.list(catalog.ListExternalLocationsRequest()) + + List external locations. + + Gets an array of external locations (__ExternalLocationInfo__ objects) from the metastore. The caller + must be a metastore admin, the owner of the external location, or a user that has some privilege on + the external location. For unpaginated request, there is no guarantee of a specific ordering of the + elements in the array. For paginated request, elements are ordered by their name. + + :param max_results: int (optional) + Maximum number of external locations to return. If not set, all the external locations are returned + (not recommended). - when set to a value greater than 0, the page length is the minimum of this + value and a server configured value; - when set to 0, the page length is set to a server configured + value (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`ExternalLocationInfo` + + + .. py:method:: update(name: str [, access_point: Optional[str], comment: Optional[str], credential_name: Optional[str], encryption_details: Optional[EncryptionDetails], force: Optional[bool], new_name: Optional[str], owner: Optional[str], read_only: Optional[bool], skip_validation: Optional[bool], url: Optional[str]]) -> ExternalLocationInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + credential = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + created = w.external_locations.create(name=f'sdk-{time.time_ns()}', + credential_name=credential.name, + url="s3://%s/%s" % (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + _ = w.external_locations.update(name=created.name, + credential_name=credential.name, + url="s3://%s/%s" % (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + # cleanup + w.storage_credentials.delete(name=credential.name) + w.external_locations.delete(name=created.name) + + Update an external location. + + Updates an external location in the metastore. The caller must be the owner of the external location, + or be a metastore admin. In the second case, the admin can only update the name of the external + location. + + :param name: str + Name of the external location. + :param access_point: str (optional) + The AWS access point to use when accesing s3 for this external location. + :param comment: str (optional) + User-provided free-form text description. + :param credential_name: str (optional) + Name of the storage credential used with this location. + :param encryption_details: :class:`EncryptionDetails` (optional) + Encryption options that apply to clients connecting to cloud storage. + :param force: bool (optional) + Force update even if changing url invalidates dependent external tables or mounts. + :param new_name: str (optional) + New name for the external location. + :param owner: str (optional) + The owner of the external location. + :param read_only: bool (optional) + Indicates whether the external location is read-only. + :param skip_validation: bool (optional) + Skips validation of the storage credential associated with the external location. + :param url: str (optional) + Path URL of the external location. + + :returns: :class:`ExternalLocationInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/functions.rst b/docs/workspace/catalog/functions.rst new file mode 100644 index 000000000..2cc572294 --- /dev/null +++ b/docs/workspace/catalog/functions.rst @@ -0,0 +1,110 @@ +``w.functions``: Functions +========================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: FunctionsAPI + + Functions implement User-Defined Functions (UDFs) in Unity Catalog. + + The function implementation can be any SQL expression or Query, and it can be invoked wherever a table + reference is allowed in a query. In Unity Catalog, a function resides at the same level as a table, so it + can be referenced with the form __catalog_name__.__schema_name__.__function_name__. + + .. py:method:: create(function_info: CreateFunction) -> FunctionInfo + + Create a function. + + Creates a new function + + The user must have the following permissions in order for the function to be created: - + **USE_CATALOG** on the function's parent catalog - **USE_SCHEMA** and **CREATE_FUNCTION** on the + function's parent schema + + :param function_info: :class:`CreateFunction` + Partial __FunctionInfo__ specifying the function to be created. + + :returns: :class:`FunctionInfo` + + + .. py:method:: delete(name: str [, force: Optional[bool]]) + + Delete a function. + + Deletes the function that matches the supplied name. For the deletion to succeed, the user must + satisfy one of the following conditions: - Is the owner of the function's parent catalog - Is the + owner of the function's parent schema and have the **USE_CATALOG** privilege on its parent catalog - + Is the owner of the function itself and have both the **USE_CATALOG** privilege on its parent catalog + and the **USE_SCHEMA** privilege on its parent schema + + :param name: str + The fully-qualified name of the function (of the form + __catalog_name__.__schema_name__.__function__name__). + :param force: bool (optional) + Force deletion even if the function is notempty. + + + + + .. py:method:: get(name: str) -> FunctionInfo + + Get a function. + + Gets a function from within a parent catalog and schema. For the fetch to succeed, the user must + satisfy one of the following requirements: - Is a metastore admin - Is an owner of the function's + parent catalog - Have the **USE_CATALOG** privilege on the function's parent catalog and be the owner + of the function - Have the **USE_CATALOG** privilege on the function's parent catalog, the + **USE_SCHEMA** privilege on the function's parent schema, and the **EXECUTE** privilege on the + function itself + + :param name: str + The fully-qualified name of the function (of the form + __catalog_name__.__schema_name__.__function__name__). + + :returns: :class:`FunctionInfo` + + + .. py:method:: list(catalog_name: str, schema_name: str [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[FunctionInfo] + + List functions. + + List functions within the specified parent catalog and schema. If the user is a metastore admin, all + functions are returned in the output list. Otherwise, the user must have the **USE_CATALOG** privilege + on the catalog and the **USE_SCHEMA** privilege on the schema, and the output list contains only + functions for which either the user has the **EXECUTE** privilege or the user is the owner. For + unpaginated request, there is no guarantee of a specific ordering of the elements in the array. For + paginated request, elements are ordered by their name. + + :param catalog_name: str + Name of parent catalog for functions of interest. + :param schema_name: str + Parent schema of functions. + :param max_results: int (optional) + Maximum number of functions to return. If not set, all the functions are returned (not recommended). + - when set to a value greater than 0, the page length is the minimum of this value and a server + configured value; - when set to 0, the page length is set to a server configured value + (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`FunctionInfo` + + + .. py:method:: update(name: str [, owner: Optional[str]]) -> FunctionInfo + + Update a function. + + Updates the function that matches the supplied name. Only the owner of the function can be updated. If + the user is not a metastore admin, the user must be a member of the group that is the new function + owner. - Is a metastore admin - Is the owner of the function's parent catalog - Is the owner of the + function's parent schema and has the **USE_CATALOG** privilege on its parent catalog - Is the owner of + the function itself and has the **USE_CATALOG** privilege on its parent catalog as well as the + **USE_SCHEMA** privilege on the function's parent schema. + + :param name: str + The fully-qualified name of the function (of the form + __catalog_name__.__schema_name__.__function__name__). + :param owner: str (optional) + Username of current owner of function. + + :returns: :class:`FunctionInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/grants.rst b/docs/workspace/catalog/grants.rst new file mode 100644 index 000000000..8def7ff83 --- /dev/null +++ b/docs/workspace/catalog/grants.rst @@ -0,0 +1,176 @@ +``w.grants``: Grants +==================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: GrantsAPI + + In Unity Catalog, data is secure by default. Initially, users have no access to data in a metastore. + Access can be granted by either a metastore admin, the owner of an object, or the owner of the catalog or + schema that contains the object. Securable objects in Unity Catalog are hierarchical and privileges are + inherited downward. + + Securable objects in Unity Catalog are hierarchical and privileges are inherited downward. This means that + granting a privilege on the catalog automatically grants the privilege to all current and future objects + within the catalog. Similarly, privileges granted on a schema are inherited by all current and future + objects within that schema. + + .. py:method:: get(securable_type: SecurableType, full_name: str [, principal: Optional[str]]) -> PermissionsList + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + table_name = f'sdk-{time.time_ns()}' + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + _ = w.statement_execution.execute(warehouse_id=os.environ["TEST_DEFAULT_WAREHOUSE_ID"], + catalog=created_catalog.name, + schema=created_schema.name, + statement="CREATE TABLE %s AS SELECT 2+2 as four" % (table_name)).result() + + table_full_name = "%s.%s.%s" % (created_catalog.name, created_schema.name, table_name) + + created_table = w.tables.get(full_name=table_full_name) + + grants = w.grants.get_effective(securable_type=catalog.SecurableType.TABLE, full_name=created_table.full_name) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.tables.delete(full_name=table_full_name) + + Get permissions. + + Gets the permissions for a securable. + + :param securable_type: :class:`SecurableType` + Type of securable. + :param full_name: str + Full name of securable. + :param principal: str (optional) + If provided, only the permissions for the specified principal (user or group) are returned. + + :returns: :class:`PermissionsList` + + + .. py:method:: get_effective(securable_type: SecurableType, full_name: str [, principal: Optional[str]]) -> EffectivePermissionsList + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + table_name = f'sdk-{time.time_ns()}' + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + _ = w.statement_execution.execute(warehouse_id=os.environ["TEST_DEFAULT_WAREHOUSE_ID"], + catalog=created_catalog.name, + schema=created_schema.name, + statement="CREATE TABLE %s AS SELECT 2+2 as four" % (table_name)).result() + + table_full_name = "%s.%s.%s" % (created_catalog.name, created_schema.name, table_name) + + created_table = w.tables.get(full_name=table_full_name) + + grants = w.grants.get_effective(securable_type=catalog.SecurableType.TABLE, full_name=created_table.full_name) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.tables.delete(full_name=table_full_name) + + Get effective permissions. + + Gets the effective permissions for a securable. + + :param securable_type: :class:`SecurableType` + Type of securable. + :param full_name: str + Full name of securable. + :param principal: str (optional) + If provided, only the effective permissions for the specified principal (user or group) are + returned. + + :returns: :class:`EffectivePermissionsList` + + + .. py:method:: update(securable_type: SecurableType, full_name: str [, changes: Optional[List[PermissionsChange]]]) -> PermissionsList + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + table_name = f'sdk-{time.time_ns()}' + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + _ = w.statement_execution.execute(warehouse_id=os.environ["TEST_DEFAULT_WAREHOUSE_ID"], + catalog=created_catalog.name, + schema=created_schema.name, + statement="CREATE TABLE %s AS SELECT 2+2 as four" % (table_name)).result() + + table_full_name = "%s.%s.%s" % (created_catalog.name, created_schema.name, table_name) + + account_level_group_name = os.environ["TEST_DATA_ENG_GROUP"] + + created_table = w.tables.get(full_name=table_full_name) + + x = w.grants.update(full_name=created_table.full_name, + securable_type=catalog.SecurableType.TABLE, + changes=[ + catalog.PermissionsChange(add=[catalog.Privilege.MODIFY, catalog.Privilege.SELECT], + principal=account_level_group_name) + ]) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.tables.delete(full_name=table_full_name) + + Update permissions. + + Updates the permissions for a securable. + + :param securable_type: :class:`SecurableType` + Type of securable. + :param full_name: str + Full name of securable. + :param changes: List[:class:`PermissionsChange`] (optional) + Array of permissions change objects. + + :returns: :class:`PermissionsList` + \ No newline at end of file diff --git a/docs/workspace/catalog/index.rst b/docs/workspace/catalog/index.rst new file mode 100644 index 000000000..b3b18dc2c --- /dev/null +++ b/docs/workspace/catalog/index.rst @@ -0,0 +1,25 @@ + +Unity Catalog +============= + +Configure data governance with Unity Catalog for metastores, catalogs, schemas, tables, external locations, and storage credentials + +.. toctree:: + :maxdepth: 1 + + artifact_allowlists + catalogs + connections + external_locations + functions + grants + metastores + model_versions + registered_models + schemas + storage_credentials + system_schemas + table_constraints + tables + volumes + workspace_bindings \ No newline at end of file diff --git a/docs/workspace/catalog/metastores.rst b/docs/workspace/catalog/metastores.rst new file mode 100644 index 000000000..a5beb397a --- /dev/null +++ b/docs/workspace/catalog/metastores.rst @@ -0,0 +1,314 @@ +``w.metastores``: Metastores +============================ +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: MetastoresAPI + + A metastore is the top-level container of objects in Unity Catalog. It stores data assets (tables and + views) and the permissions that govern access to them. Databricks account admins can create metastores and + assign them to Databricks workspaces to control which workloads use each metastore. For a workspace to use + Unity Catalog, it must have a Unity Catalog metastore attached. + + Each metastore is configured with a root storage location in a cloud storage account. This storage + location is used for metadata and managed tables data. + + NOTE: This metastore is distinct from the metastore included in Databricks workspaces created before Unity + Catalog was released. If your workspace includes a legacy Hive metastore, the data in that metastore is + available in a catalog named hive_metastore. + + .. py:method:: assign(workspace_id: int, metastore_id: str, default_catalog_name: str) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + workspace_id = os.environ["DUMMY_WORKSPACE_ID"] + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + w.metastores.assign(metastore_id=created.metastore_id, workspace_id=workspace_id) + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Create an assignment. + + Creates a new metastore assignment. If an assignment for the same __workspace_id__ exists, it will be + overwritten by the new __metastore_id__ and __default_catalog_name__. The caller must be an account + admin. + + :param workspace_id: int + A workspace ID. + :param metastore_id: str + The unique ID of the metastore. + :param default_catalog_name: str + The name of the default catalog in the metastore. + + + + + .. py:method:: create(name: str [, region: Optional[str], storage_root: Optional[str]]) -> MetastoreInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Create a metastore. + + Creates a new metastore based on a provided name and optional storage root path. By default (if the + __owner__ field is not set), the owner of the new metastore is the user calling the + __createMetastore__ API. If the __owner__ field is set to the empty string (**""**), the ownership is + assigned to the System User instead. + + :param name: str + The user-specified name of the metastore. + :param region: str (optional) + Cloud region which the metastore serves (e.g., `us-west-2`, `westus`). If this field is omitted, the + region of the workspace receiving the request will be used. + :param storage_root: str (optional) + The storage root URL for metastore + + :returns: :class:`MetastoreInfo` + + + .. py:method:: current() -> MetastoreAssignment + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + current_metastore = w.metastores.current() + + Get metastore assignment for workspace. + + Gets the metastore assignment for the workspace being accessed. + + :returns: :class:`MetastoreAssignment` + + + .. py:method:: delete(id: str [, force: Optional[bool]]) + + Delete a metastore. + + Deletes a metastore. The caller must be a metastore admin. + + :param id: str + Unique ID of the metastore. + :param force: bool (optional) + Force deletion even if the metastore is not empty. Default is false. + + + + + .. py:method:: get(id: str) -> MetastoreInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + _ = w.metastores.get(id=created.metastore_id) + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Get a metastore. + + Gets a metastore that matches the supplied ID. The caller must be a metastore admin to retrieve this + info. + + :param id: str + Unique ID of the metastore. + + :returns: :class:`MetastoreInfo` + + + .. py:method:: list() -> Iterator[MetastoreInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.metastores.list() + + List metastores. + + Gets an array of the available metastores (as __MetastoreInfo__ objects). The caller must be an admin + to retrieve this info. There is no guarantee of a specific ordering of the elements in the array. + + :returns: Iterator over :class:`MetastoreInfo` + + + .. py:method:: summary() -> GetMetastoreSummaryResponse + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + summary = w.metastores.summary() + + Get a metastore summary. + + Gets information about a metastore. This summary includes the storage credential, the cloud vendor, + the cloud region, and the global metastore ID. + + :returns: :class:`GetMetastoreSummaryResponse` + + + .. py:method:: unassign(workspace_id: int, metastore_id: str) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + workspace_id = os.environ["DUMMY_WORKSPACE_ID"] + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + w.metastores.unassign(metastore_id=created.metastore_id, workspace_id=workspace_id) + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Delete an assignment. + + Deletes a metastore assignment. The caller must be an account administrator. + + :param workspace_id: int + A workspace ID. + :param metastore_id: str + Query for the ID of the metastore to delete. + + + + + .. py:method:: update(id: str [, delta_sharing_organization_name: Optional[str], delta_sharing_recipient_token_lifetime_in_seconds: Optional[int], delta_sharing_scope: Optional[UpdateMetastoreDeltaSharingScope], name: Optional[str], new_name: Optional[str], owner: Optional[str], privilege_model_version: Optional[str], storage_root_credential_id: Optional[str]]) -> MetastoreInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + _ = w.metastores.update(id=created.metastore_id, name=f'sdk-{time.time_ns()}') + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Update a metastore. + + Updates information for a specific metastore. The caller must be a metastore admin. If the __owner__ + field is set to the empty string (**""**), the ownership is updated to the System User. + + :param id: str + Unique ID of the metastore. + :param delta_sharing_organization_name: str (optional) + The organization name of a Delta Sharing entity, to be used in Databricks-to-Databricks Delta + Sharing as the official name. + :param delta_sharing_recipient_token_lifetime_in_seconds: int (optional) + The lifetime of delta sharing recipient token in seconds. + :param delta_sharing_scope: :class:`UpdateMetastoreDeltaSharingScope` (optional) + The scope of Delta Sharing enabled for the metastore. + :param name: str (optional) + The user-specified name of the metastore. + :param new_name: str (optional) + New name for the metastore. + :param owner: str (optional) + The owner of the metastore. + :param privilege_model_version: str (optional) + Privilege model version of the metastore, of the form `major.minor` (e.g., `1.0`). + :param storage_root_credential_id: str (optional) + UUID of storage credential to access the metastore storage_root. + + :returns: :class:`MetastoreInfo` + + + .. py:method:: update_assignment(workspace_id: int [, default_catalog_name: Optional[str], metastore_id: Optional[str]]) + + Update an assignment. + + Updates a metastore assignment. This operation can be used to update __metastore_id__ or + __default_catalog_name__ for a specified Workspace, if the Workspace is already assigned a metastore. + The caller must be an account admin to update __metastore_id__; otherwise, the caller can be a + Workspace admin. + + :param workspace_id: int + A workspace ID. + :param default_catalog_name: str (optional) + The name of the default catalog for the metastore. + :param metastore_id: str (optional) + The unique ID of the metastore. + + + \ No newline at end of file diff --git a/docs/workspace/catalog/model_versions.rst b/docs/workspace/catalog/model_versions.rst new file mode 100644 index 000000000..9b609f304 --- /dev/null +++ b/docs/workspace/catalog/model_versions.rst @@ -0,0 +1,118 @@ +``w.model_versions``: Model Versions +==================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: ModelVersionsAPI + + Databricks provides a hosted version of MLflow Model Registry in Unity Catalog. Models in Unity Catalog + provide centralized access control, auditing, lineage, and discovery of ML models across Databricks + workspaces. + + This API reference documents the REST endpoints for managing model versions in Unity Catalog. For more + details, see the [registered models API docs](/api/workspace/registeredmodels). + + .. py:method:: delete(full_name: str, version: int) + + Delete a Model Version. + + Deletes a model version from the specified registered model. Any aliases assigned to the model version + will also be deleted. + + The caller must be a metastore admin or an owner of the parent registered model. For the latter case, + the caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + The three-level (fully qualified) name of the model version + :param version: int + The integer version number of the model version + + + + + .. py:method:: get(full_name: str, version: int) -> RegisteredModelInfo + + Get a Model Version. + + Get a model version. + + The caller must be a metastore admin or an owner of (or have the **EXECUTE** privilege on) the parent + registered model. For the latter case, the caller must also be the owner or have the **USE_CATALOG** + privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + The three-level (fully qualified) name of the model version + :param version: int + The integer version number of the model version + + :returns: :class:`RegisteredModelInfo` + + + .. py:method:: get_by_alias(full_name: str, alias: str) -> ModelVersionInfo + + Get Model Version By Alias. + + Get a model version by alias. + + The caller must be a metastore admin or an owner of (or have the **EXECUTE** privilege on) the + registered model. For the latter case, the caller must also be the owner or have the **USE_CATALOG** + privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + The three-level (fully qualified) name of the registered model + :param alias: str + The name of the alias + + :returns: :class:`ModelVersionInfo` + + + .. py:method:: list(full_name: str [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[ModelVersionInfo] + + List Model Versions. + + List model versions. You can list model versions under a particular schema, or list all model versions + in the current metastore. + + The returned models are filtered based on the privileges of the calling user. For example, the + metastore admin is able to list all the model versions. A regular user needs to be the owner or have + the **EXECUTE** privilege on the parent registered model to recieve the model versions in the + response. For the latter case, the caller must also be the owner or have the **USE_CATALOG** privilege + on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + + There is no guarantee of a specific ordering of the elements in the response. + + :param full_name: str + The full three-level name of the registered model under which to list model versions + :param max_results: int (optional) + Maximum number of model versions to return. If not set, the page length is set to a server + configured value (100, as of 1/3/2024). - when set to a value greater than 0, the page length is the + minimum of this value and a server configured value(1000, as of 1/3/2024); - when set to 0, the page + length is set to a server configured value (100, as of 1/3/2024) (recommended); - when set to a + value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`ModelVersionInfo` + + + .. py:method:: update(full_name: str, version: int [, comment: Optional[str]]) -> ModelVersionInfo + + Update a Model Version. + + Updates the specified model version. + + The caller must be a metastore admin or an owner of the parent registered model. For the latter case, + the caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + Currently only the comment of the model version can be updated. + + :param full_name: str + The three-level (fully qualified) name of the model version + :param version: int + The integer version number of the model version + :param comment: str (optional) + The comment attached to the model version + + :returns: :class:`ModelVersionInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/registered_models.rst b/docs/workspace/catalog/registered_models.rst new file mode 100644 index 000000000..784e0d272 --- /dev/null +++ b/docs/workspace/catalog/registered_models.rst @@ -0,0 +1,185 @@ +``w.registered_models``: Registered Models +========================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: RegisteredModelsAPI + + Databricks provides a hosted version of MLflow Model Registry in Unity Catalog. Models in Unity Catalog + provide centralized access control, auditing, lineage, and discovery of ML models across Databricks + workspaces. + + An MLflow registered model resides in the third layer of Unity Catalog’s three-level namespace. + Registered models contain model versions, which correspond to actual ML models (MLflow models). Creating + new model versions currently requires use of the MLflow Python client. Once model versions are created, + you can load them for batch inference using MLflow Python client APIs, or deploy them for real-time + serving using Databricks Model Serving. + + All operations on registered models and model versions require USE_CATALOG permissions on the enclosing + catalog and USE_SCHEMA permissions on the enclosing schema. In addition, the following additional + privileges are required for various operations: + + * To create a registered model, users must additionally have the CREATE_MODEL permission on the target + schema. * To view registered model or model version metadata, model version data files, or invoke a model + version, users must additionally have the EXECUTE permission on the registered model * To update + registered model or model version tags, users must additionally have APPLY TAG permissions on the + registered model * To update other registered model or model version metadata (comments, aliases) create a + new model version, or update permissions on the registered model, users must be owners of the registered + model. + + Note: The securable type for models is "FUNCTION". When using REST APIs (e.g. tagging, grants) that + specify a securable type, use "FUNCTION" as the securable type. + + .. py:method:: create(catalog_name: str, schema_name: str, name: str [, comment: Optional[str], storage_location: Optional[str]]) -> RegisteredModelInfo + + Create a Registered Model. + + Creates a new registered model in Unity Catalog. + + File storage for model versions in the registered model will be located in the default location which + is specified by the parent schema, or the parent catalog, or the Metastore. + + For registered model creation to succeed, the user must satisfy the following conditions: - The caller + must be a metastore admin, or be the owner of the parent catalog and schema, or have the + **USE_CATALOG** privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + - The caller must have the **CREATE MODEL** or **CREATE FUNCTION** privilege on the parent schema. + + :param catalog_name: str + The name of the catalog where the schema and the registered model reside + :param schema_name: str + The name of the schema where the registered model resides + :param name: str + The name of the registered model + :param comment: str (optional) + The comment attached to the registered model + :param storage_location: str (optional) + The storage location on the cloud under which model version data files are stored + + :returns: :class:`RegisteredModelInfo` + + + .. py:method:: delete(full_name: str) + + Delete a Registered Model. + + Deletes a registered model and all its model versions from the specified parent catalog and schema. + + The caller must be a metastore admin or an owner of the registered model. For the latter case, the + caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + The three-level (fully qualified) name of the registered model + + + + + .. py:method:: delete_alias(full_name: str, alias: str) + + Delete a Registered Model Alias. + + Deletes a registered model alias. + + The caller must be a metastore admin or an owner of the registered model. For the latter case, the + caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + The three-level (fully qualified) name of the registered model + :param alias: str + The name of the alias + + + + + .. py:method:: get(full_name: str) -> RegisteredModelInfo + + Get a Registered Model. + + Get a registered model. + + The caller must be a metastore admin or an owner of (or have the **EXECUTE** privilege on) the + registered model. For the latter case, the caller must also be the owner or have the **USE_CATALOG** + privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + The three-level (fully qualified) name of the registered model + + :returns: :class:`RegisteredModelInfo` + + + .. py:method:: list( [, catalog_name: Optional[str], max_results: Optional[int], page_token: Optional[str], schema_name: Optional[str]]) -> Iterator[RegisteredModelInfo] + + List Registered Models. + + List registered models. You can list registered models under a particular schema, or list all + registered models in the current metastore. + + The returned models are filtered based on the privileges of the calling user. For example, the + metastore admin is able to list all the registered models. A regular user needs to be the owner or + have the **EXECUTE** privilege on the registered model to recieve the registered models in the + response. For the latter case, the caller must also be the owner or have the **USE_CATALOG** privilege + on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + + There is no guarantee of a specific ordering of the elements in the response. + + :param catalog_name: str (optional) + The identifier of the catalog under which to list registered models. If specified, schema_name must + be specified. + :param max_results: int (optional) + Max number of registered models to return. If catalog and schema are unspecified, max_results must + be specified. If max_results is unspecified, we return all results, starting from the page specified + by page_token. + :param page_token: str (optional) + Opaque token to send for the next page of results (pagination). + :param schema_name: str (optional) + The identifier of the schema under which to list registered models. If specified, catalog_name must + be specified. + + :returns: Iterator over :class:`RegisteredModelInfo` + + + .. py:method:: set_alias(full_name: str, alias: str, version_num: int) -> RegisteredModelAlias + + Set a Registered Model Alias. + + Set an alias on the specified registered model. + + The caller must be a metastore admin or an owner of the registered model. For the latter case, the + caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + Full name of the registered model + :param alias: str + The name of the alias + :param version_num: int + The version number of the model version to which the alias points + + :returns: :class:`RegisteredModelAlias` + + + .. py:method:: update(full_name: str [, comment: Optional[str], name: Optional[str], new_name: Optional[str], owner: Optional[str]]) -> RegisteredModelInfo + + Update a Registered Model. + + Updates the specified registered model. + + The caller must be a metastore admin or an owner of the registered model. For the latter case, the + caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + Currently only the name, the owner or the comment of the registered model can be updated. + + :param full_name: str + The three-level (fully qualified) name of the registered model + :param comment: str (optional) + The comment attached to the registered model + :param name: str (optional) + The name of the registered model + :param new_name: str (optional) + New name for the registered model. + :param owner: str (optional) + The identifier of the user who owns the registered model + + :returns: :class:`RegisteredModelInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/schemas.rst b/docs/workspace/catalog/schemas.rst new file mode 100644 index 000000000..46ea49ff4 --- /dev/null +++ b/docs/workspace/catalog/schemas.rst @@ -0,0 +1,186 @@ +``w.schemas``: Schemas +====================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: SchemasAPI + + A schema (also called a database) is the second layer of Unity Catalog’s three-level namespace. A schema + organizes tables, views and functions. To access (or list) a table or view in a schema, users must have + the USE_SCHEMA data permission on the schema and its parent catalog, and they must have the SELECT + permission on the table or view. + + .. py:method:: create(name: str, catalog_name: str [, comment: Optional[str], properties: Optional[Dict[str, str]], storage_root: Optional[str]]) -> SchemaInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + # cleanup + w.catalogs.delete(name=created_catalog.name, force=True) + w.schemas.delete(full_name=created_schema.full_name) + + Create a schema. + + Creates a new schema for catalog in the Metatastore. The caller must be a metastore admin, or have the + **CREATE_SCHEMA** privilege in the parent catalog. + + :param name: str + Name of schema, relative to parent catalog. + :param catalog_name: str + Name of parent catalog. + :param comment: str (optional) + User-provided free-form text description. + :param properties: Dict[str,str] (optional) + A map of key-value properties attached to the securable. + :param storage_root: str (optional) + Storage root URL for managed tables within schema. + + :returns: :class:`SchemaInfo` + + + .. py:method:: delete(full_name: str) + + Delete a schema. + + Deletes the specified schema from the parent catalog. The caller must be the owner of the schema or an + owner of the parent catalog. + + :param full_name: str + Full name of the schema. + + + + + .. py:method:: get(full_name: str) -> SchemaInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + new_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=new_catalog.name) + + _ = w.schemas.get(full_name=created.full_name) + + # cleanup + w.catalogs.delete(name=new_catalog.name, force=True) + w.schemas.delete(full_name=created.full_name) + + Get a schema. + + Gets the specified schema within the metastore. The caller must be a metastore admin, the owner of the + schema, or a user that has the **USE_SCHEMA** privilege on the schema. + + :param full_name: str + Full name of the schema. + + :returns: :class:`SchemaInfo` + + + .. py:method:: list(catalog_name: str [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[SchemaInfo] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + new_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + all = w.schemas.list(catalog_name=new_catalog.name) + + # cleanup + w.catalogs.delete(name=new_catalog.name, force=True) + + List schemas. + + Gets an array of schemas for a catalog in the metastore. If the caller is the metastore admin or the + owner of the parent catalog, all schemas for the catalog will be retrieved. Otherwise, only schemas + owned by the caller (or for which the caller has the **USE_SCHEMA** privilege) will be retrieved. For + unpaginated request, there is no guarantee of a specific ordering of the elements in the array. For + paginated request, elements are ordered by their name. + + :param catalog_name: str + Parent catalog for schemas of interest. + :param max_results: int (optional) + Maximum number of schemas to return. If not set, all the schemas are returned (not recommended). - + when set to a value greater than 0, the page length is the minimum of this value and a server + configured value; - when set to 0, the page length is set to a server configured value + (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`SchemaInfo` + + + .. py:method:: update(full_name: str [, comment: Optional[str], enable_predictive_optimization: Optional[EnablePredictiveOptimization], name: Optional[str], new_name: Optional[str], owner: Optional[str], properties: Optional[Dict[str, str]]]) -> SchemaInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + new_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=new_catalog.name) + + _ = w.schemas.update(full_name=created.full_name, comment=f'sdk-{time.time_ns()}') + + # cleanup + w.catalogs.delete(name=new_catalog.name, force=True) + w.schemas.delete(full_name=created.full_name) + + Update a schema. + + Updates a schema for a catalog. The caller must be the owner of the schema or a metastore admin. If + the caller is a metastore admin, only the __owner__ field can be changed in the update. If the + __name__ field must be updated, the caller must be a metastore admin or have the **CREATE_SCHEMA** + privilege on the parent catalog. + + :param full_name: str + Full name of the schema. + :param comment: str (optional) + User-provided free-form text description. + :param enable_predictive_optimization: :class:`EnablePredictiveOptimization` (optional) + Whether predictive optimization should be enabled for this object and objects under it. + :param name: str (optional) + Name of schema, relative to parent catalog. + :param new_name: str (optional) + New name for the schema. + :param owner: str (optional) + Username of current owner of schema. + :param properties: Dict[str,str] (optional) + A map of key-value properties attached to the securable. + + :returns: :class:`SchemaInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/storage_credentials.rst b/docs/workspace/catalog/storage_credentials.rst new file mode 100644 index 000000000..42a830f09 --- /dev/null +++ b/docs/workspace/catalog/storage_credentials.rst @@ -0,0 +1,242 @@ +``w.storage_credentials``: Storage Credentials +============================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: StorageCredentialsAPI + + A storage credential represents an authentication and authorization mechanism for accessing data stored on + your cloud tenant. Each storage credential is subject to Unity Catalog access-control policies that + control which users and groups can access the credential. If a user does not have access to a storage + credential in Unity Catalog, the request fails and Unity Catalog does not attempt to authenticate to your + cloud tenant on the user’s behalf. + + Databricks recommends using external locations rather than using storage credentials directly. + + To create storage credentials, you must be a Databricks account admin. The account admin who creates the + storage credential can delegate ownership to another user or group to manage permissions on it. + + .. py:method:: create(name: str [, aws_iam_role: Optional[AwsIamRole], azure_managed_identity: Optional[AzureManagedIdentity], azure_service_principal: Optional[AzureServicePrincipal], cloudflare_api_token: Optional[CloudflareApiToken], comment: Optional[str], databricks_gcp_service_account: Optional[Any], read_only: Optional[bool], skip_validation: Optional[bool]]) -> StorageCredentialInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + created = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + # cleanup + w.storage_credentials.delete(delete=created.name) + + Create a storage credential. + + Creates a new storage credential. + + :param name: str + The credential name. The name must be unique within the metastore. + :param aws_iam_role: :class:`AwsIamRole` (optional) + The AWS IAM role configuration. + :param azure_managed_identity: :class:`AzureManagedIdentity` (optional) + The Azure managed identity configuration. + :param azure_service_principal: :class:`AzureServicePrincipal` (optional) + The Azure service principal configuration. + :param cloudflare_api_token: :class:`CloudflareApiToken` (optional) + The Cloudflare API token configuration. + :param comment: str (optional) + Comment associated with the credential. + :param databricks_gcp_service_account: Any (optional) + The managed GCP service account configuration. + :param read_only: bool (optional) + Whether the storage credential is only usable for read operations. + :param skip_validation: bool (optional) + Supplying true to this argument skips validation of the created credential. + + :returns: :class:`StorageCredentialInfo` + + + .. py:method:: delete(name: str [, force: Optional[bool]]) + + Delete a credential. + + Deletes a storage credential from the metastore. The caller must be an owner of the storage + credential. + + :param name: str + Name of the storage credential. + :param force: bool (optional) + Force deletion even if there are dependent external locations or external tables. + + + + + .. py:method:: get(name: str) -> StorageCredentialInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + created = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + by_name = w.storage_credentials.get(name=created.name) + + # cleanup + w.storage_credentials.delete(name=created.name) + + Get a credential. + + Gets a storage credential from the metastore. The caller must be a metastore admin, the owner of the + storage credential, or have some permission on the storage credential. + + :param name: str + Name of the storage credential. + + :returns: :class:`StorageCredentialInfo` + + + .. py:method:: list( [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[StorageCredentialInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.storage_credentials.list() + + List credentials. + + Gets an array of storage credentials (as __StorageCredentialInfo__ objects). The array is limited to + only those storage credentials the caller has permission to access. If the caller is a metastore + admin, retrieval of credentials is unrestricted. For unpaginated request, there is no guarantee of a + specific ordering of the elements in the array. For paginated request, elements are ordered by their + name. + + :param max_results: int (optional) + Maximum number of storage credentials to return. If not set, all the storage credentials are + returned (not recommended). - when set to a value greater than 0, the page length is the minimum of + this value and a server configured value; - when set to 0, the page length is set to a server + configured value (recommended); - when set to a value less than 0, an invalid parameter error is + returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`StorageCredentialInfo` + + + .. py:method:: update(name: str [, aws_iam_role: Optional[AwsIamRole], azure_managed_identity: Optional[AzureManagedIdentity], azure_service_principal: Optional[AzureServicePrincipal], cloudflare_api_token: Optional[CloudflareApiToken], comment: Optional[str], databricks_gcp_service_account: Optional[Any], force: Optional[bool], new_name: Optional[str], owner: Optional[str], read_only: Optional[bool], skip_validation: Optional[bool]]) -> StorageCredentialInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + created = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + _ = w.storage_credentials.update( + name=created.name, + comment=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + # cleanup + w.storage_credentials.delete(delete=created.name) + + Update a credential. + + Updates a storage credential on the metastore. + + :param name: str + Name of the storage credential. + :param aws_iam_role: :class:`AwsIamRole` (optional) + The AWS IAM role configuration. + :param azure_managed_identity: :class:`AzureManagedIdentity` (optional) + The Azure managed identity configuration. + :param azure_service_principal: :class:`AzureServicePrincipal` (optional) + The Azure service principal configuration. + :param cloudflare_api_token: :class:`CloudflareApiToken` (optional) + The Cloudflare API token configuration. + :param comment: str (optional) + Comment associated with the credential. + :param databricks_gcp_service_account: Any (optional) + The managed GCP service account configuration. + :param force: bool (optional) + Force update even if there are dependent external locations or external tables. + :param new_name: str (optional) + New name for the storage credential. + :param owner: str (optional) + Username of current owner of credential. + :param read_only: bool (optional) + Whether the storage credential is only usable for read operations. + :param skip_validation: bool (optional) + Supplying true to this argument skips validation of the updated credential. + + :returns: :class:`StorageCredentialInfo` + + + .. py:method:: validate( [, aws_iam_role: Optional[AwsIamRole], azure_managed_identity: Optional[AzureManagedIdentity], azure_service_principal: Optional[AzureServicePrincipal], cloudflare_api_token: Optional[CloudflareApiToken], databricks_gcp_service_account: Optional[Any], external_location_name: Optional[str], read_only: Optional[bool], storage_credential_name: Optional[Any], url: Optional[str]]) -> ValidateStorageCredentialResponse + + Validate a storage credential. + + Validates a storage credential. At least one of __external_location_name__ and __url__ need to be + provided. If only one of them is provided, it will be used for validation. And if both are provided, + the __url__ will be used for validation, and __external_location_name__ will be ignored when checking + overlapping urls. + + Either the __storage_credential_name__ or the cloud-specific credential must be provided. + + The caller must be a metastore admin or the storage credential owner or have the + **CREATE_EXTERNAL_LOCATION** privilege on the metastore and the storage credential. + + :param aws_iam_role: :class:`AwsIamRole` (optional) + The AWS IAM role configuration. + :param azure_managed_identity: :class:`AzureManagedIdentity` (optional) + The Azure managed identity configuration. + :param azure_service_principal: :class:`AzureServicePrincipal` (optional) + The Azure service principal configuration. + :param cloudflare_api_token: :class:`CloudflareApiToken` (optional) + The Cloudflare API token configuration. + :param databricks_gcp_service_account: Any (optional) + The Databricks created GCP service account configuration. + :param external_location_name: str (optional) + The name of an existing external location to validate. + :param read_only: bool (optional) + Whether the storage credential is only usable for read operations. + :param storage_credential_name: Any (optional) + The name of the storage credential to validate. + :param url: str (optional) + The external location url to validate. + + :returns: :class:`ValidateStorageCredentialResponse` + \ No newline at end of file diff --git a/docs/workspace/catalog/system_schemas.rst b/docs/workspace/catalog/system_schemas.rst new file mode 100644 index 000000000..13c4a56df --- /dev/null +++ b/docs/workspace/catalog/system_schemas.rst @@ -0,0 +1,51 @@ +``w.system_schemas``: SystemSchemas +=================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: SystemSchemasAPI + + A system schema is a schema that lives within the system catalog. A system schema may contain information + about customer usage of Unity Catalog such as audit-logs, billing-logs, lineage information, etc. + + .. py:method:: disable(metastore_id: str, schema_name: DisableSchemaName) + + Disable a system schema. + + Disables the system schema and removes it from the system catalog. The caller must be an account admin + or a metastore admin. + + :param metastore_id: str + The metastore ID under which the system schema lives. + :param schema_name: :class:`DisableSchemaName` + Full name of the system schema. + + + + + .. py:method:: enable(metastore_id: str, schema_name: EnableSchemaName) + + Enable a system schema. + + Enables the system schema and adds it to the system catalog. The caller must be an account admin or a + metastore admin. + + :param metastore_id: str + The metastore ID under which the system schema lives. + :param schema_name: :class:`EnableSchemaName` + Full name of the system schema. + + + + + .. py:method:: list(metastore_id: str) -> Iterator[SystemSchemaInfo] + + List system schemas. + + Gets an array of system schemas for a metastore. The caller must be an account admin or a metastore + admin. + + :param metastore_id: str + The ID for the metastore in which the system schema resides. + + :returns: Iterator over :class:`SystemSchemaInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/table_constraints.rst b/docs/workspace/catalog/table_constraints.rst new file mode 100644 index 000000000..dd46c42f3 --- /dev/null +++ b/docs/workspace/catalog/table_constraints.rst @@ -0,0 +1,62 @@ +``w.table_constraints``: Table Constraints +========================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: TableConstraintsAPI + + Primary key and foreign key constraints encode relationships between fields in tables. + + Primary and foreign keys are informational only and are not enforced. Foreign keys must reference a + primary key in another table. This primary key is the parent constraint of the foreign key and the table + this primary key is on is the parent table of the foreign key. Similarly, the foreign key is the child + constraint of its referenced primary key; the table of the foreign key is the child table of the primary + key. + + You can declare primary keys and foreign keys as part of the table specification during table creation. + You can also add or drop constraints on existing tables. + + .. py:method:: create(full_name_arg: str, constraint: TableConstraint) -> TableConstraint + + Create a table constraint. + + Creates a new table constraint. + + For the table constraint creation to succeed, the user must satisfy both of these conditions: - the + user must have the **USE_CATALOG** privilege on the table's parent catalog, the **USE_SCHEMA** + privilege on the table's parent schema, and be the owner of the table. - if the new constraint is a + __ForeignKeyConstraint__, the user must have the **USE_CATALOG** privilege on the referenced parent + table's catalog, the **USE_SCHEMA** privilege on the referenced parent table's schema, and be the + owner of the referenced parent table. + + :param full_name_arg: str + The full name of the table referenced by the constraint. + :param constraint: :class:`TableConstraint` + A table constraint, as defined by *one* of the following fields being set: + __primary_key_constraint__, __foreign_key_constraint__, __named_table_constraint__. + + :returns: :class:`TableConstraint` + + + .. py:method:: delete(full_name: str, constraint_name: str, cascade: bool) + + Delete a table constraint. + + Deletes a table constraint. + + For the table constraint deletion to succeed, the user must satisfy both of these conditions: - the + user must have the **USE_CATALOG** privilege on the table's parent catalog, the **USE_SCHEMA** + privilege on the table's parent schema, and be the owner of the table. - if __cascade__ argument is + **true**, the user must have the following permissions on all of the child tables: the **USE_CATALOG** + privilege on the table's catalog, the **USE_SCHEMA** privilege on the table's schema, and be the owner + of the table. + + :param full_name: str + Full name of the table referenced by the constraint. + :param constraint_name: str + The name of the constraint to delete. + :param cascade: bool + If true, try deleting all child constraints of the current constraint. If false, reject this + operation if the current constraint has any child constraints. + + + \ No newline at end of file diff --git a/docs/workspace/catalog/tables.rst b/docs/workspace/catalog/tables.rst new file mode 100644 index 000000000..90b51ca57 --- /dev/null +++ b/docs/workspace/catalog/tables.rst @@ -0,0 +1,201 @@ +``w.tables``: Tables +==================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: TablesAPI + + A table resides in the third layer of Unity Catalog’s three-level namespace. It contains rows of data. + To create a table, users must have CREATE_TABLE and USE_SCHEMA permissions on the schema, and they must + have the USE_CATALOG permission on its parent catalog. To query a table, users must have the SELECT + permission on the table, and they must have the USE_CATALOG permission on its parent catalog and the + USE_SCHEMA permission on its parent schema. + + A table can be managed or external. From an API perspective, a __VIEW__ is a particular kind of table + (rather than a managed or external table). + + .. py:method:: delete(full_name: str) + + Delete a table. + + Deletes a table from the specified parent catalog and schema. The caller must be the owner of the + parent catalog, have the **USE_CATALOG** privilege on the parent catalog and be the owner of the + parent schema, or be the owner of the table and have the **USE_CATALOG** privilege on the parent + catalog and the **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + Full name of the table. + + + + + .. py:method:: get(full_name: str [, include_delta_metadata: Optional[bool]]) -> TableInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + table_name = f'sdk-{time.time_ns()}' + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + _ = w.statement_execution.execute(warehouse_id=os.environ["TEST_DEFAULT_WAREHOUSE_ID"], + catalog=created_catalog.name, + schema=created_schema.name, + statement="CREATE TABLE %s AS SELECT 2+2 as four" % (table_name)).result() + + table_full_name = "%s.%s.%s" % (created_catalog.name, created_schema.name, table_name) + + created_table = w.tables.get(full_name=table_full_name) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.tables.delete(full_name=table_full_name) + + Get a table. + + Gets a table from the metastore for a specific catalog and schema. The caller must be a metastore + admin, be the owner of the table and have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema, or be the owner of the table and have the **SELECT** + privilege on it as well. + + :param full_name: str + Full name of the table. + :param include_delta_metadata: bool (optional) + Whether delta metadata should be included in the response. + + :returns: :class:`TableInfo` + + + .. py:method:: list(catalog_name: str, schema_name: str [, include_delta_metadata: Optional[bool], max_results: Optional[int], omit_columns: Optional[bool], omit_properties: Optional[bool], page_token: Optional[str]]) -> Iterator[TableInfo] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + all_tables = w.tables.list(catalog_name=created_catalog.name, schema_name=created_schema.name) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + + List tables. + + Gets an array of all tables for the current metastore under the parent catalog and schema. The caller + must be a metastore admin or an owner of (or have the **SELECT** privilege on) the table. For the + latter case, the caller must also be the owner or have the **USE_CATALOG** privilege on the parent + catalog and the **USE_SCHEMA** privilege on the parent schema. There is no guarantee of a specific + ordering of the elements in the array. + + :param catalog_name: str + Name of parent catalog for tables of interest. + :param schema_name: str + Parent schema of tables. + :param include_delta_metadata: bool (optional) + Whether delta metadata should be included in the response. + :param max_results: int (optional) + Maximum number of tables to return. If not set, all the tables are returned (not recommended). - + when set to a value greater than 0, the page length is the minimum of this value and a server + configured value; - when set to 0, the page length is set to a server configured value + (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param omit_columns: bool (optional) + Whether to omit the columns of the table from the response or not. + :param omit_properties: bool (optional) + Whether to omit the properties of the table from the response or not. + :param page_token: str (optional) + Opaque token to send for the next page of results (pagination). + + :returns: Iterator over :class:`TableInfo` + + + .. py:method:: list_summaries(catalog_name: str [, max_results: Optional[int], page_token: Optional[str], schema_name_pattern: Optional[str], table_name_pattern: Optional[str]]) -> Iterator[TableSummary] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + summaries = w.tables.list_summaries(catalog_name=created_catalog.name, + schema_name_pattern=created_schema.name) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + + List table summaries. + + Gets an array of summaries for tables for a schema and catalog within the metastore. The table + summaries returned are either: + + * summaries for tables (within the current metastore and parent catalog and schema), when the user is + a metastore admin, or: * summaries for tables and schemas (within the current metastore and parent + catalog) for which the user has ownership or the **SELECT** privilege on the table and ownership or + **USE_SCHEMA** privilege on the schema, provided that the user also has ownership or the + **USE_CATALOG** privilege on the parent catalog. + + There is no guarantee of a specific ordering of the elements in the array. + + :param catalog_name: str + Name of parent catalog for tables of interest. + :param max_results: int (optional) + Maximum number of summaries for tables to return. If not set, the page length is set to a server + configured value (10000, as of 1/5/2024). - when set to a value greater than 0, the page length is + the minimum of this value and a server configured value (10000, as of 1/5/2024); - when set to 0, + the page length is set to a server configured value (10000, as of 1/5/2024) (recommended); - when + set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + :param schema_name_pattern: str (optional) + A sql LIKE pattern (% and _) for schema names. All schemas will be returned if not set or empty. + :param table_name_pattern: str (optional) + A sql LIKE pattern (% and _) for table names. All tables will be returned if not set or empty. + + :returns: Iterator over :class:`TableSummary` + + + .. py:method:: update(full_name: str [, owner: Optional[str]]) + + Update a table owner. + + Change the owner of the table. The caller must be the owner of the parent catalog, have the + **USE_CATALOG** privilege on the parent catalog and be the owner of the parent schema, or be the owner + of the table and have the **USE_CATALOG** privilege on the parent catalog and the **USE_SCHEMA** + privilege on the parent schema. + + :param full_name: str + Full name of the table. + :param owner: str (optional) + + + \ No newline at end of file diff --git a/docs/workspace/catalog/volumes.rst b/docs/workspace/catalog/volumes.rst new file mode 100644 index 000000000..0e0426e5e --- /dev/null +++ b/docs/workspace/catalog/volumes.rst @@ -0,0 +1,277 @@ +``w.volumes``: Volumes +====================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: VolumesAPI + + Volumes are a Unity Catalog (UC) capability for accessing, storing, governing, organizing and processing + files. Use cases include running machine learning on unstructured data such as image, audio, video, or PDF + files, organizing data sets during the data exploration stages in data science, working with libraries + that require access to the local file system on cluster machines, storing library and config files of + arbitrary formats such as .whl or .txt centrally and providing secure access across workspaces to it, or + transforming and querying non-tabular data files in ETL. + + .. py:method:: create(catalog_name: str, schema_name: str, name: str, volume_type: VolumeType [, comment: Optional[str], storage_location: Optional[str]]) -> VolumeInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + storage_credential = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), + comment="created via SDK") + + external_location = w.external_locations.create(name=f'sdk-{time.time_ns()}', + credential_name=storage_credential.name, + comment="created via SDK", + url="s3://" + os.environ["TEST_BUCKET"] + "/" + + f'sdk-{time.time_ns()}') + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + created_volume = w.volumes.create(catalog_name=created_catalog.name, + schema_name=created_schema.name, + name=f'sdk-{time.time_ns()}', + storage_location=external_location.url, + volume_type=catalog.VolumeType.EXTERNAL) + + # cleanup + w.storage_credentials.delete(name=storage_credential.name) + w.external_locations.delete(name=external_location.name) + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.volumes.delete(full_name_arg=created_volume.full_name) + + Create a Volume. + + Creates a new volume. + + The user could create either an external volume or a managed volume. An external volume will be + created in the specified external location, while a managed volume will be located in the default + location which is specified by the parent schema, or the parent catalog, or the Metastore. + + For the volume creation to succeed, the user must satisfy following conditions: - The caller must be a + metastore admin, or be the owner of the parent catalog and schema, or have the **USE_CATALOG** + privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. - The caller + must have **CREATE VOLUME** privilege on the parent schema. + + For an external volume, following conditions also need to satisfy - The caller must have **CREATE + EXTERNAL VOLUME** privilege on the external location. - There are no other tables, nor volumes + existing in the specified storage location. - The specified storage location is not under the location + of other tables, nor volumes, or catalogs or schemas. + + :param catalog_name: str + The name of the catalog where the schema and the volume are + :param schema_name: str + The name of the schema where the volume is + :param name: str + The name of the volume + :param volume_type: :class:`VolumeType` + :param comment: str (optional) + The comment attached to the volume + :param storage_location: str (optional) + The storage location on the cloud + + :returns: :class:`VolumeInfo` + + + .. py:method:: delete(full_name_arg: str) + + Delete a Volume. + + Deletes a volume from the specified parent catalog and schema. + + The caller must be a metastore admin or an owner of the volume. For the latter case, the caller must + also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the **USE_SCHEMA** + privilege on the parent schema. + + :param full_name_arg: str + The three-level (fully qualified) name of the volume + + + + + .. py:method:: list(catalog_name: str, schema_name: str) -> Iterator[VolumeInfo] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + all_volumes = w.volumes.list(catalog_name=created_catalog.name, schema_name=created_schema.name) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + + List Volumes. + + Gets an array of all volumes for the current metastore under the parent catalog and schema. + + The returned volumes are filtered based on the privileges of the calling user. For example, the + metastore admin is able to list all the volumes. A regular user needs to be the owner or have the + **READ VOLUME** privilege on the volume to recieve the volumes in the response. For the latter case, + the caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + There is no guarantee of a specific ordering of the elements in the array. + + :param catalog_name: str + The identifier of the catalog + :param schema_name: str + The identifier of the schema + + :returns: Iterator over :class:`VolumeInfo` + + + .. py:method:: read(full_name_arg: str) -> VolumeInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + storage_credential = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), + comment="created via SDK") + + external_location = w.external_locations.create(name=f'sdk-{time.time_ns()}', + credential_name=storage_credential.name, + comment="created via SDK", + url="s3://" + os.environ["TEST_BUCKET"] + "/" + + f'sdk-{time.time_ns()}') + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + created_volume = w.volumes.create(catalog_name=created_catalog.name, + schema_name=created_schema.name, + name=f'sdk-{time.time_ns()}', + storage_location=external_location.url, + volume_type=catalog.VolumeType.EXTERNAL) + + loaded_volume = w.volumes.read(full_name_arg=created_volume.full_name) + + # cleanup + w.storage_credentials.delete(name=storage_credential.name) + w.external_locations.delete(name=external_location.name) + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.volumes.delete(full_name_arg=created_volume.full_name) + + Get a Volume. + + Gets a volume from the metastore for a specific catalog and schema. + + The caller must be a metastore admin or an owner of (or have the **READ VOLUME** privilege on) the + volume. For the latter case, the caller must also be the owner or have the **USE_CATALOG** privilege + on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + + :param full_name_arg: str + The three-level (fully qualified) name of the volume + + :returns: :class:`VolumeInfo` + + + .. py:method:: update(full_name_arg: str [, comment: Optional[str], name: Optional[str], new_name: Optional[str], owner: Optional[str]]) -> VolumeInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + storage_credential = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), + comment="created via SDK") + + external_location = w.external_locations.create(name=f'sdk-{time.time_ns()}', + credential_name=storage_credential.name, + comment="created via SDK", + url="s3://" + os.environ["TEST_BUCKET"] + "/" + + f'sdk-{time.time_ns()}') + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + created_volume = w.volumes.create(catalog_name=created_catalog.name, + schema_name=created_schema.name, + name=f'sdk-{time.time_ns()}', + storage_location=external_location.url, + volume_type=catalog.VolumeType.EXTERNAL) + + loaded_volume = w.volumes.read(full_name_arg=created_volume.full_name) + + _ = w.volumes.update(full_name_arg=loaded_volume.full_name, comment="Updated volume comment") + + # cleanup + w.storage_credentials.delete(name=storage_credential.name) + w.external_locations.delete(name=external_location.name) + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.volumes.delete(full_name_arg=created_volume.full_name) + + Update a Volume. + + Updates the specified volume under the specified parent catalog and schema. + + The caller must be a metastore admin or an owner of the volume. For the latter case, the caller must + also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the **USE_SCHEMA** + privilege on the parent schema. + + Currently only the name, the owner or the comment of the volume could be updated. + + :param full_name_arg: str + The three-level (fully qualified) name of the volume + :param comment: str (optional) + The comment attached to the volume + :param name: str (optional) + The name of the volume + :param new_name: str (optional) + New name for the volume. + :param owner: str (optional) + The identifier of the user who owns the volume + + :returns: :class:`VolumeInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/workspace_bindings.rst b/docs/workspace/catalog/workspace_bindings.rst new file mode 100644 index 000000000..e1ec753d4 --- /dev/null +++ b/docs/workspace/catalog/workspace_bindings.rst @@ -0,0 +1,123 @@ +``w.workspace_bindings``: Workspace Bindings +============================================ +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: WorkspaceBindingsAPI + + A securable in Databricks can be configured as __OPEN__ or __ISOLATED__. An __OPEN__ securable can be + accessed from any workspace, while an __ISOLATED__ securable can only be accessed from a configured list + of workspaces. This API allows you to configure (bind) securables to workspaces. + + NOTE: The __isolation_mode__ is configured for the securable itself (using its Update method) and the + workspace bindings are only consulted when the securable's __isolation_mode__ is set to __ISOLATED__. + + A securable's workspace bindings can be configured by a metastore admin or the owner of the securable. + + The original path (/api/2.1/unity-catalog/workspace-bindings/catalogs/{name}) is deprecated. Please use + the new path (/api/2.1/unity-catalog/bindings/{securable_type}/{securable_name}) which introduces the + ability to bind a securable in READ_ONLY mode (catalogs only). + + Securables that support binding: - catalog + + .. py:method:: get(name: str) -> CurrentWorkspaceBindings + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + bindings = w.workspace_bindings.get(name=created.name) + + # cleanup + w.catalogs.delete(name=created.name, force=True) + + Get catalog workspace bindings. + + Gets workspace bindings of the catalog. The caller must be a metastore admin or an owner of the + catalog. + + :param name: str + The name of the catalog. + + :returns: :class:`CurrentWorkspaceBindings` + + + .. py:method:: get_bindings(securable_type: str, securable_name: str) -> WorkspaceBindingsResponse + + Get securable workspace bindings. + + Gets workspace bindings of the securable. The caller must be a metastore admin or an owner of the + securable. + + :param securable_type: str + The type of the securable. + :param securable_name: str + The name of the securable. + + :returns: :class:`WorkspaceBindingsResponse` + + + .. py:method:: update(name: str [, assign_workspaces: Optional[List[int]], unassign_workspaces: Optional[List[int]]]) -> CurrentWorkspaceBindings + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + this_workspace_id = os.environ["THIS_WORKSPACE_ID"] + + created = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + _ = w.workspace_bindings.update(name=created.name, assign_workspaces=[this_workspace_id]) + + # cleanup + w.catalogs.delete(name=created.name, force=True) + + Update catalog workspace bindings. + + Updates workspace bindings of the catalog. The caller must be a metastore admin or an owner of the + catalog. + + :param name: str + The name of the catalog. + :param assign_workspaces: List[int] (optional) + A list of workspace IDs. + :param unassign_workspaces: List[int] (optional) + A list of workspace IDs. + + :returns: :class:`CurrentWorkspaceBindings` + + + .. py:method:: update_bindings(securable_type: str, securable_name: str [, add: Optional[List[WorkspaceBinding]], remove: Optional[List[WorkspaceBinding]]]) -> WorkspaceBindingsResponse + + Update securable workspace bindings. + + Updates workspace bindings of the securable. The caller must be a metastore admin or an owner of the + securable. + + :param securable_type: str + The type of the securable. + :param securable_name: str + The name of the securable. + :param add: List[:class:`WorkspaceBinding`] (optional) + List of workspace bindings + :param remove: List[:class:`WorkspaceBinding`] (optional) + List of workspace bindings + + :returns: :class:`WorkspaceBindingsResponse` + \ No newline at end of file diff --git a/docs/workspace/compute/cluster_policies.rst b/docs/workspace/compute/cluster_policies.rst new file mode 100644 index 000000000..f5c27d5c2 --- /dev/null +++ b/docs/workspace/compute/cluster_policies.rst @@ -0,0 +1,289 @@ +``w.cluster_policies``: Cluster Policies +======================================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: ClusterPoliciesAPI + + You can use cluster policies to control users' ability to configure clusters based on a set of rules. + These rules specify which attributes or attribute values can be used during cluster creation. Cluster + policies have ACLs that limit their use to specific users and groups. + + With cluster policies, you can: - Auto-install cluster libraries on the next restart by listing them in + the policy's "libraries" field. - Limit users to creating clusters with the prescribed settings. - + Simplify the user interface, enabling more users to create clusters, by fixing and hiding some fields. - + Manage costs by setting limits on attributes that impact the hourly rate. + + Cluster policy permissions limit which policies a user can select in the Policy drop-down when the user + creates a cluster: - A user who has unrestricted cluster create permission can select the Unrestricted + policy and create fully-configurable clusters. - A user who has both unrestricted cluster create + permission and access to cluster policies can select the Unrestricted policy and policies they have access + to. - A user that has access to only cluster policies, can select the policies they have access to. + + If no policies exist in the workspace, the Policy drop-down doesn't appear. Only admin users can create, + edit, and delete policies. Admin users also have access to all policies. + + .. py:method:: create(name: str [, definition: Optional[str], description: Optional[str], libraries: Optional[List[Library]], max_clusters_per_user: Optional[int], policy_family_definition_overrides: Optional[str], policy_family_id: Optional[str]]) -> CreatePolicyResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.cluster_policies.create(name=f'sdk-{time.time_ns()}', + definition="""{ + "spark_conf.spark.databricks.delta.preview.enabled": { + "type": "fixed", + "value": true + } + } + """) + + # cleanup + w.cluster_policies.delete(policy_id=created.policy_id) + + Create a new policy. + + Creates a new policy with prescribed settings. + + :param name: str + Cluster Policy name requested by the user. This has to be unique. Length must be between 1 and 100 + characters. + :param definition: str (optional) + Policy definition document expressed in [Databricks Cluster Policy Definition Language]. + + [Databricks Cluster Policy Definition Language]: https://docs.databricks.com/administration-guide/clusters/policy-definition.html + :param description: str (optional) + Additional human-readable description of the cluster policy. + :param libraries: List[:class:`Library`] (optional) + A list of libraries to be installed on the next cluster restart that uses this policy. + :param max_clusters_per_user: int (optional) + Max number of clusters per user that can be active using this policy. If not present, there is no + max limit. + :param policy_family_definition_overrides: str (optional) + Policy definition JSON document expressed in [Databricks Policy Definition Language]. The JSON + document must be passed as a string and cannot be embedded in the requests. + + You can use this to customize the policy definition inherited from the policy family. Policy rules + specified here are merged into the inherited policy definition. + + [Databricks Policy Definition Language]: https://docs.databricks.com/administration-guide/clusters/policy-definition.html + :param policy_family_id: str (optional) + ID of the policy family. The cluster policy's policy definition inherits the policy family's policy + definition. + + Cannot be used with `definition`. Use `policy_family_definition_overrides` instead to customize the + policy definition. + + :returns: :class:`CreatePolicyResponse` + + + .. py:method:: delete(policy_id: str) + + Delete a cluster policy. + + Delete a policy for a cluster. Clusters governed by this policy can still run, but cannot be edited. + + :param policy_id: str + The ID of the policy to delete. + + + + + .. py:method:: edit(policy_id: str, name: str [, definition: Optional[str], description: Optional[str], libraries: Optional[List[Library]], max_clusters_per_user: Optional[int], policy_family_definition_overrides: Optional[str], policy_family_id: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.cluster_policies.create(name=f'sdk-{time.time_ns()}', + definition="""{ + "spark_conf.spark.databricks.delta.preview.enabled": { + "type": "fixed", + "value": true + } + } + """) + + policy = w.cluster_policies.get(policy_id=created.policy_id) + + w.cluster_policies.edit(policy_id=policy.policy_id, + name=policy.name, + definition="""{ + "spark_conf.spark.databricks.delta.preview.enabled": { + "type": "fixed", + "value": false + } + } + """) + + # cleanup + w.cluster_policies.delete(policy_id=created.policy_id) + + Update a cluster policy. + + Update an existing policy for cluster. This operation may make some clusters governed by the previous + policy invalid. + + :param policy_id: str + The ID of the policy to update. + :param name: str + Cluster Policy name requested by the user. This has to be unique. Length must be between 1 and 100 + characters. + :param definition: str (optional) + Policy definition document expressed in [Databricks Cluster Policy Definition Language]. + + [Databricks Cluster Policy Definition Language]: https://docs.databricks.com/administration-guide/clusters/policy-definition.html + :param description: str (optional) + Additional human-readable description of the cluster policy. + :param libraries: List[:class:`Library`] (optional) + A list of libraries to be installed on the next cluster restart that uses this policy. + :param max_clusters_per_user: int (optional) + Max number of clusters per user that can be active using this policy. If not present, there is no + max limit. + :param policy_family_definition_overrides: str (optional) + Policy definition JSON document expressed in [Databricks Policy Definition Language]. The JSON + document must be passed as a string and cannot be embedded in the requests. + + You can use this to customize the policy definition inherited from the policy family. Policy rules + specified here are merged into the inherited policy definition. + + [Databricks Policy Definition Language]: https://docs.databricks.com/administration-guide/clusters/policy-definition.html + :param policy_family_id: str (optional) + ID of the policy family. The cluster policy's policy definition inherits the policy family's policy + definition. + + Cannot be used with `definition`. Use `policy_family_definition_overrides` instead to customize the + policy definition. + + + + + .. py:method:: get(policy_id: str) -> Policy + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.cluster_policies.create(name=f'sdk-{time.time_ns()}', + definition="""{ + "spark_conf.spark.databricks.delta.preview.enabled": { + "type": "fixed", + "value": true + } + } + """) + + policy = w.cluster_policies.get(policy_id=created.policy_id) + + # cleanup + w.cluster_policies.delete(policy_id=created.policy_id) + + Get a cluster policy. + + Get a cluster policy entity. Creation and editing is available to admins only. + + :param policy_id: str + Canonical unique identifier for the cluster policy. + + :returns: :class:`Policy` + + + .. py:method:: get_permission_levels(cluster_policy_id: str) -> GetClusterPolicyPermissionLevelsResponse + + Get cluster policy permission levels. + + Gets the permission levels that a user can have on an object. + + :param cluster_policy_id: str + The cluster policy for which to get or manage permissions. + + :returns: :class:`GetClusterPolicyPermissionLevelsResponse` + + + .. py:method:: get_permissions(cluster_policy_id: str) -> ClusterPolicyPermissions + + Get cluster policy permissions. + + Gets the permissions of a cluster policy. Cluster policies can inherit permissions from their root + object. + + :param cluster_policy_id: str + The cluster policy for which to get or manage permissions. + + :returns: :class:`ClusterPolicyPermissions` + + + .. py:method:: list( [, sort_column: Optional[ListSortColumn], sort_order: Optional[ListSortOrder]]) -> Iterator[Policy] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import compute + + w = WorkspaceClient() + + all = w.cluster_policies.list(compute.ListClusterPoliciesRequest()) + + List cluster policies. + + Returns a list of policies accessible by the requesting user. + + :param sort_column: :class:`ListSortColumn` (optional) + The cluster policy attribute to sort by. * `POLICY_CREATION_TIME` - Sort result list by policy + creation time. * `POLICY_NAME` - Sort result list by policy name. + :param sort_order: :class:`ListSortOrder` (optional) + The order in which the policies get listed. * `DESC` - Sort result list in descending order. * `ASC` + - Sort result list in ascending order. + + :returns: Iterator over :class:`Policy` + + + .. py:method:: set_permissions(cluster_policy_id: str [, access_control_list: Optional[List[ClusterPolicyAccessControlRequest]]]) -> ClusterPolicyPermissions + + Set cluster policy permissions. + + Sets permissions on a cluster policy. Cluster policies can inherit permissions from their root object. + + :param cluster_policy_id: str + The cluster policy for which to get or manage permissions. + :param access_control_list: List[:class:`ClusterPolicyAccessControlRequest`] (optional) + + :returns: :class:`ClusterPolicyPermissions` + + + .. py:method:: update_permissions(cluster_policy_id: str [, access_control_list: Optional[List[ClusterPolicyAccessControlRequest]]]) -> ClusterPolicyPermissions + + Update cluster policy permissions. + + Updates the permissions on a cluster policy. Cluster policies can inherit permissions from their root + object. + + :param cluster_policy_id: str + The cluster policy for which to get or manage permissions. + :param access_control_list: List[:class:`ClusterPolicyAccessControlRequest`] (optional) + + :returns: :class:`ClusterPolicyPermissions` + \ No newline at end of file diff --git a/docs/workspace/compute/clusters.rst b/docs/workspace/compute/clusters.rst new file mode 100644 index 000000000..5fb2592e0 --- /dev/null +++ b/docs/workspace/compute/clusters.rst @@ -0,0 +1,1002 @@ +``w.clusters``: Clusters +======================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: ClustersExt + + The Clusters API allows you to create, start, edit, list, terminate, and delete clusters. + + Databricks maps cluster node instance types to compute units known as DBUs. See the instance type pricing + page for a list of the supported instance types and their corresponding DBUs. + + A Databricks cluster is a set of computation resources and configurations on which you run data + engineering, data science, and data analytics workloads, such as production ETL pipelines, streaming + analytics, ad-hoc analytics, and machine learning. + + You run these workloads as a set of commands in a notebook or as an automated job. Databricks makes a + distinction between all-purpose clusters and job clusters. You use all-purpose clusters to analyze data + collaboratively using interactive notebooks. You use job clusters to run fast and robust automated jobs. + + You can create an all-purpose cluster using the UI, CLI, or REST API. You can manually terminate and + restart an all-purpose cluster. Multiple users can share such clusters to do collaborative interactive + analysis. + + IMPORTANT: Databricks retains cluster configuration information for up to 200 all-purpose clusters + terminated in the last 30 days and up to 30 job clusters recently terminated by the job scheduler. To keep + an all-purpose cluster configuration even after it has been terminated for more than 30 days, an + administrator can pin a cluster to the cluster list. + + .. py:method:: change_owner(cluster_id: str, owner_username: str) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + other_owner = w.users.create(user_name=f'sdk-{time.time_ns()}@example.com') + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + w.clusters.change_owner(cluster_id=clstr.cluster_id, owner_username=other_owner.user_name) + + # cleanup + w.users.delete(id=other_owner.id) + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Change cluster owner. + + Change the owner of the cluster. You must be an admin to perform this operation. + + :param cluster_id: str + + :param owner_username: str + New owner of the cluster_id after this RPC. + + + + + .. py:method:: create(spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], cluster_source: Optional[ClusterSource], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType]]) -> Wait[ClusterDetails] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Create new cluster. + + Creates a new Spark cluster. This method will acquire new instances from the cloud provider if + necessary. Note: Databricks may not be able to acquire some of the requested nodes, due to cloud + provider limitations (account limits, spot price, etc.) or transient network issues. + + If Databricks acquires at least 85% of the requested on-demand nodes, cluster creation will succeed. + Otherwise the cluster will terminate with an informative error message. + + :param spark_version: str + The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be + retrieved by using the :method:clusters/sparkVersions API call. + :param apply_policy_default_values: bool (optional) + :param autoscale: :class:`AutoScale` (optional) + Parameters needed in order to automatically scale clusters up and down based on load. Note: + autoscaling works best with DB runtime versions 3.0 or later. + :param autotermination_minutes: int (optional) + Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this + cluster will not be automatically terminated. If specified, the threshold must be between 10 and + 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. + :param aws_attributes: :class:`AwsAttributes` (optional) + Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, + a set of default values will be used. + :param azure_attributes: :class:`AzureAttributes` (optional) + Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a + set of default values will be used. + :param cluster_log_conf: :class:`ClusterLogConf` (optional) + The configuration for delivering spark logs to a long-term storage destination. Two kinds of + destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If + the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of + driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is + `$destination/$clusterId/executor`. + :param cluster_name: str (optional) + Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, + the cluster name will be an empty string. + :param cluster_source: :class:`ClusterSource` (optional) + Determines whether the cluster was created by a user through the UI, created by the Databricks Jobs + Scheduler, or through an API request. This is the same as cluster_creator, but read only. + :param custom_tags: Dict[str,str] (optional) + Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS + instances and EBS volumes) with these tags in addition to `default_tags`. Notes: + + - Currently, Databricks allows at most 45 custom tags + + - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + :param data_security_mode: :class:`DataSecurityMode` (optional) + Data security mode decides what data governance model to use when accessing data from a cluster. + + * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are + not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a + single user specified in `single_user_name`. Most programming languages, cluster features and data + governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be + shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data + and credentials. Most data governance features are supported in this mode. But programming languages + and cluster features might be limited. * `LEGACY_TABLE_ACL`: This mode is for users migrating from + legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy + Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating + from legacy Passthrough on standard clusters. + :param docker_image: :class:`DockerImage` (optional) + :param driver_instance_pool_id: str (optional) + The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses + the instance pool with id (instance_pool_id) if the driver pool is not assigned. + :param driver_node_type_id: str (optional) + The node type of the Spark driver. Note that this field is optional; if unset, the driver node type + will be set as the same value as `node_type_id` defined above. + :param enable_elastic_disk: bool (optional) + Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space + when its Spark workers are running low on disk space. This feature requires specific AWS permissions + to function correctly - refer to the User Guide for more details. + :param enable_local_disk_encryption: bool (optional) + Whether to enable LUKS on cluster VMs' local disks + :param gcp_attributes: :class:`GcpAttributes` (optional) + Attributes related to clusters running on Google Cloud Platform. If not specified at cluster + creation, a set of default values will be used. + :param init_scripts: List[:class:`InitScriptInfo`] (optional) + The configuration for storing init scripts. Any number of destinations can be specified. The scripts + are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script + logs are sent to `//init_scripts`. + :param instance_pool_id: str (optional) + The optional ID of the instance pool to which the cluster belongs. + :param node_type_id: str (optional) + This field encodes, through a single value, the resources available to each of the Spark nodes in + this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute + intensive workloads. A list of available node types can be retrieved by using the + :method:clusters/listNodeTypes API call. + :param num_workers: int (optional) + Number of worker nodes that this cluster should have. A cluster has one Spark Driver and + `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. + + Note: When reading the properties of a cluster, this field reflects the desired number of workers + rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 + workers, this field will immediately be updated to reflect the target size of 10 workers, whereas + the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are + provisioned. + :param policy_id: str (optional) + The ID of the cluster policy used to create the cluster if applicable. + :param runtime_engine: :class:`RuntimeEngine` (optional) + Decides which runtime engine to be use, e.g. Standard vs. Photon. If unspecified, the runtime engine + is inferred from spark_version. + :param single_user_name: str (optional) + Single user name if data_security_mode is `SINGLE_USER` + :param spark_conf: Dict[str,str] (optional) + An object containing a set of optional, user-specified Spark configuration key-value pairs. Users + can also pass in a string of extra JVM options to the driver and the executors via + `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + :param spark_env_vars: Dict[str,str] (optional) + An object containing a set of optional, user-specified environment variable key-value pairs. Please + note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while + launching the driver and workers. + + In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to + `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks + managed environmental variables are included as well. + + Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": + "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS + -Dspark.shuffle.service.enabled=true"}` + :param ssh_public_keys: List[str] (optional) + SSH public key contents that will be added to each Spark node in this cluster. The corresponding + private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be + specified. + :param workload_type: :class:`WorkloadType` (optional) + + :returns: + Long-running operation waiter for :class:`ClusterDetails`. + See :method:wait_get_cluster_running for more details. + + + .. py:method:: create_and_wait(spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], cluster_source: Optional[ClusterSource], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType], timeout: datetime.timedelta = 0:20:00]) -> ClusterDetails + + + .. py:method:: delete(cluster_id: str) -> Wait[ClusterDetails] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + _ = w.clusters.delete(cluster_id=clstr.cluster_id).result() + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Terminate cluster. + + Terminates the Spark cluster with the specified ID. The cluster is removed asynchronously. Once the + termination has completed, the cluster will be in a `TERMINATED` state. If the cluster is already in a + `TERMINATING` or `TERMINATED` state, nothing will happen. + + :param cluster_id: str + The cluster to be terminated. + + :returns: + Long-running operation waiter for :class:`ClusterDetails`. + See :method:wait_get_cluster_terminated for more details. + + + .. py:method:: delete_and_wait(cluster_id: str, timeout: datetime.timedelta = 0:20:00) -> ClusterDetails + + + .. py:method:: edit(cluster_id: str, spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], cluster_source: Optional[ClusterSource], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType]]) -> Wait[ClusterDetails] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + cluster_name = f'sdk-{time.time_ns()}' + + latest = w.clusters.select_spark_version(latest=True) + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + _ = w.clusters.edit(cluster_id=clstr.cluster_id, + spark_version=latest, + cluster_name=cluster_name, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=10, + num_workers=2).result() + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Update cluster configuration. + + Updates the configuration of a cluster to match the provided attributes and size. A cluster can be + updated if it is in a `RUNNING` or `TERMINATED` state. + + If a cluster is updated while in a `RUNNING` state, it will be restarted so that the new attributes + can take effect. + + If a cluster is updated while in a `TERMINATED` state, it will remain `TERMINATED`. The next time it + is started using the `clusters/start` API, the new attributes will take effect. Any attempt to update + a cluster in any other state will be rejected with an `INVALID_STATE` error code. + + Clusters created by the Databricks Jobs service cannot be edited. + + :param cluster_id: str + ID of the cluser + :param spark_version: str + The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be + retrieved by using the :method:clusters/sparkVersions API call. + :param apply_policy_default_values: bool (optional) + :param autoscale: :class:`AutoScale` (optional) + Parameters needed in order to automatically scale clusters up and down based on load. Note: + autoscaling works best with DB runtime versions 3.0 or later. + :param autotermination_minutes: int (optional) + Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this + cluster will not be automatically terminated. If specified, the threshold must be between 10 and + 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. + :param aws_attributes: :class:`AwsAttributes` (optional) + Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, + a set of default values will be used. + :param azure_attributes: :class:`AzureAttributes` (optional) + Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a + set of default values will be used. + :param cluster_log_conf: :class:`ClusterLogConf` (optional) + The configuration for delivering spark logs to a long-term storage destination. Two kinds of + destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If + the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of + driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is + `$destination/$clusterId/executor`. + :param cluster_name: str (optional) + Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, + the cluster name will be an empty string. + :param cluster_source: :class:`ClusterSource` (optional) + Determines whether the cluster was created by a user through the UI, created by the Databricks Jobs + Scheduler, or through an API request. This is the same as cluster_creator, but read only. + :param custom_tags: Dict[str,str] (optional) + Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS + instances and EBS volumes) with these tags in addition to `default_tags`. Notes: + + - Currently, Databricks allows at most 45 custom tags + + - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + :param data_security_mode: :class:`DataSecurityMode` (optional) + Data security mode decides what data governance model to use when accessing data from a cluster. + + * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are + not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a + single user specified in `single_user_name`. Most programming languages, cluster features and data + governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be + shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data + and credentials. Most data governance features are supported in this mode. But programming languages + and cluster features might be limited. * `LEGACY_TABLE_ACL`: This mode is for users migrating from + legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy + Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating + from legacy Passthrough on standard clusters. + :param docker_image: :class:`DockerImage` (optional) + :param driver_instance_pool_id: str (optional) + The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses + the instance pool with id (instance_pool_id) if the driver pool is not assigned. + :param driver_node_type_id: str (optional) + The node type of the Spark driver. Note that this field is optional; if unset, the driver node type + will be set as the same value as `node_type_id` defined above. + :param enable_elastic_disk: bool (optional) + Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space + when its Spark workers are running low on disk space. This feature requires specific AWS permissions + to function correctly - refer to the User Guide for more details. + :param enable_local_disk_encryption: bool (optional) + Whether to enable LUKS on cluster VMs' local disks + :param gcp_attributes: :class:`GcpAttributes` (optional) + Attributes related to clusters running on Google Cloud Platform. If not specified at cluster + creation, a set of default values will be used. + :param init_scripts: List[:class:`InitScriptInfo`] (optional) + The configuration for storing init scripts. Any number of destinations can be specified. The scripts + are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script + logs are sent to `//init_scripts`. + :param instance_pool_id: str (optional) + The optional ID of the instance pool to which the cluster belongs. + :param node_type_id: str (optional) + This field encodes, through a single value, the resources available to each of the Spark nodes in + this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute + intensive workloads. A list of available node types can be retrieved by using the + :method:clusters/listNodeTypes API call. + :param num_workers: int (optional) + Number of worker nodes that this cluster should have. A cluster has one Spark Driver and + `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. + + Note: When reading the properties of a cluster, this field reflects the desired number of workers + rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 + workers, this field will immediately be updated to reflect the target size of 10 workers, whereas + the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are + provisioned. + :param policy_id: str (optional) + The ID of the cluster policy used to create the cluster if applicable. + :param runtime_engine: :class:`RuntimeEngine` (optional) + Decides which runtime engine to be use, e.g. Standard vs. Photon. If unspecified, the runtime engine + is inferred from spark_version. + :param single_user_name: str (optional) + Single user name if data_security_mode is `SINGLE_USER` + :param spark_conf: Dict[str,str] (optional) + An object containing a set of optional, user-specified Spark configuration key-value pairs. Users + can also pass in a string of extra JVM options to the driver and the executors via + `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + :param spark_env_vars: Dict[str,str] (optional) + An object containing a set of optional, user-specified environment variable key-value pairs. Please + note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while + launching the driver and workers. + + In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to + `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks + managed environmental variables are included as well. + + Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": + "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS + -Dspark.shuffle.service.enabled=true"}` + :param ssh_public_keys: List[str] (optional) + SSH public key contents that will be added to each Spark node in this cluster. The corresponding + private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be + specified. + :param workload_type: :class:`WorkloadType` (optional) + + :returns: + Long-running operation waiter for :class:`ClusterDetails`. + See :method:wait_get_cluster_running for more details. + + + .. py:method:: edit_and_wait(cluster_id: str, spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], cluster_source: Optional[ClusterSource], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType], timeout: datetime.timedelta = 0:20:00]) -> ClusterDetails + + + .. py:method:: ensure_cluster_is_running(cluster_id: str) + + + Usage: + + .. code-block:: + + import os + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import compute + + w = WorkspaceClient() + + cluster_id = os.environ["TEST_DEFAULT_CLUSTER_ID"] + + context = w.command_execution.create(cluster_id=cluster_id, language=compute.Language.PYTHON).result() + + w.clusters.ensure_cluster_is_running(cluster_id) + + # cleanup + w.command_execution.destroy(cluster_id=cluster_id, context_id=context.id) + + Ensures that given cluster is running, regardless of the current state + + .. py:method:: events(cluster_id: str [, end_time: Optional[int], event_types: Optional[List[EventType]], limit: Optional[int], offset: Optional[int], order: Optional[GetEventsOrder], start_time: Optional[int]]) -> Iterator[ClusterEvent] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + events = w.clusters.events(cluster_id=clstr.cluster_id) + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + List cluster activity events. + + Retrieves a list of events about the activity of a cluster. This API is paginated. If there are more + events to read, the response includes all the nparameters necessary to request the next page of + events. + + :param cluster_id: str + The ID of the cluster to retrieve events about. + :param end_time: int (optional) + The end time in epoch milliseconds. If empty, returns events up to the current time. + :param event_types: List[:class:`EventType`] (optional) + An optional set of event types to filter on. If empty, all event types are returned. + :param limit: int (optional) + The maximum number of events to include in a page of events. Defaults to 50, and maximum allowed + value is 500. + :param offset: int (optional) + The offset in the result set. Defaults to 0 (no offset). When an offset is specified and the results + are requested in descending order, the end_time field is required. + :param order: :class:`GetEventsOrder` (optional) + The order to list events in; either "ASC" or "DESC". Defaults to "DESC". + :param start_time: int (optional) + The start time in epoch milliseconds. If empty, returns events starting from the beginning of time. + + :returns: Iterator over :class:`ClusterEvent` + + + .. py:method:: get(cluster_id: str) -> ClusterDetails + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + by_id = w.clusters.get(cluster_id=clstr.cluster_id) + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Get cluster info. + + Retrieves the information for a cluster given its identifier. Clusters can be described while they are + running, or up to 60 days after they are terminated. + + :param cluster_id: str + The cluster about which to retrieve information. + + :returns: :class:`ClusterDetails` + + + .. py:method:: get_permission_levels(cluster_id: str) -> GetClusterPermissionLevelsResponse + + Get cluster permission levels. + + Gets the permission levels that a user can have on an object. + + :param cluster_id: str + The cluster for which to get or manage permissions. + + :returns: :class:`GetClusterPermissionLevelsResponse` + + + .. py:method:: get_permissions(cluster_id: str) -> ClusterPermissions + + Get cluster permissions. + + Gets the permissions of a cluster. Clusters can inherit permissions from their root object. + + :param cluster_id: str + The cluster for which to get or manage permissions. + + :returns: :class:`ClusterPermissions` + + + .. py:method:: list( [, can_use_client: Optional[str]]) -> Iterator[ClusterDetails] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import compute + + w = WorkspaceClient() + + all = w.clusters.list(compute.ListClustersRequest()) + + List all clusters. + + Return information about all pinned clusters, active clusters, up to 200 of the most recently + terminated all-purpose clusters in the past 30 days, and up to 30 of the most recently terminated job + clusters in the past 30 days. + + For example, if there is 1 pinned cluster, 4 active clusters, 45 terminated all-purpose clusters in + the past 30 days, and 50 terminated job clusters in the past 30 days, then this API returns the 1 + pinned cluster, 4 active clusters, all 45 terminated all-purpose clusters, and the 30 most recently + terminated job clusters. + + :param can_use_client: str (optional) + Filter clusters based on what type of client it can be used for. Could be either NOTEBOOKS or JOBS. + No input for this field will get all clusters in the workspace without filtering on its supported + client + + :returns: Iterator over :class:`ClusterDetails` + + + .. py:method:: list_node_types() -> ListNodeTypesResponse + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + nodes = w.clusters.list_node_types() + + List node types. + + Returns a list of supported Spark node types. These node types can be used to launch a cluster. + + :returns: :class:`ListNodeTypesResponse` + + + .. py:method:: list_zones() -> ListAvailableZonesResponse + + List availability zones. + + Returns a list of availability zones where clusters can be created in (For example, us-west-2a). These + zones can be used to launch a cluster. + + :returns: :class:`ListAvailableZonesResponse` + + + .. py:method:: permanent_delete(cluster_id: str) + + Permanently delete cluster. + + Permanently deletes a Spark cluster. This cluster is terminated and resources are asynchronously + removed. + + In addition, users will no longer see permanently deleted clusters in the cluster list, and API users + can no longer perform any action on permanently deleted clusters. + + :param cluster_id: str + The cluster to be deleted. + + + + + .. py:method:: pin(cluster_id: str) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + w.clusters.pin(cluster_id=clstr.cluster_id) + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Pin cluster. + + Pinning a cluster ensures that the cluster will always be returned by the ListClusters API. Pinning a + cluster that is already pinned will have no effect. This API can only be called by workspace admins. + + :param cluster_id: str + + + + + + .. py:method:: resize(cluster_id: str [, autoscale: Optional[AutoScale], num_workers: Optional[int]]) -> Wait[ClusterDetails] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + by_id = w.clusters.resize(cluster_id=clstr.cluster_id, num_workers=1).result() + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Resize cluster. + + Resizes a cluster to have a desired number of workers. This will fail unless the cluster is in a + `RUNNING` state. + + :param cluster_id: str + The cluster to be resized. + :param autoscale: :class:`AutoScale` (optional) + Parameters needed in order to automatically scale clusters up and down based on load. Note: + autoscaling works best with DB runtime versions 3.0 or later. + :param num_workers: int (optional) + Number of worker nodes that this cluster should have. A cluster has one Spark Driver and + `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. + + Note: When reading the properties of a cluster, this field reflects the desired number of workers + rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 + workers, this field will immediately be updated to reflect the target size of 10 workers, whereas + the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are + provisioned. + + :returns: + Long-running operation waiter for :class:`ClusterDetails`. + See :method:wait_get_cluster_running for more details. + + + .. py:method:: resize_and_wait(cluster_id: str [, autoscale: Optional[AutoScale], num_workers: Optional[int], timeout: datetime.timedelta = 0:20:00]) -> ClusterDetails + + + .. py:method:: restart(cluster_id: str [, restart_user: Optional[str]]) -> Wait[ClusterDetails] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + _ = w.clusters.restart(cluster_id=clstr.cluster_id).result() + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Restart cluster. + + Restarts a Spark cluster with the supplied ID. If the cluster is not currently in a `RUNNING` state, + nothing will happen. + + :param cluster_id: str + The cluster to be started. + :param restart_user: str (optional) + + + :returns: + Long-running operation waiter for :class:`ClusterDetails`. + See :method:wait_get_cluster_running for more details. + + + .. py:method:: restart_and_wait(cluster_id: str [, restart_user: Optional[str], timeout: datetime.timedelta = 0:20:00]) -> ClusterDetails + + + .. py:method:: select_node_type(min_memory_gb: int, gb_per_core: int, min_cores: int, min_gpus: int, local_disk: bool, local_disk_min_size: int, category: str, photon_worker_capable: bool, photon_driver_capable: bool, graviton: bool, is_io_cache_enabled: bool, support_port_forwarding: bool, fleet: str) -> str + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + smallest = w.clusters.select_node_type(local_disk=True) + + Selects smallest available node type given the conditions. + + :param min_memory_gb: int + :param gb_per_core: int + :param min_cores: int + :param min_gpus: int + :param local_disk: bool + :param local_disk_min_size: bool + :param category: bool + :param photon_worker_capable: bool + :param photon_driver_capable: bool + :param graviton: bool + :param is_io_cache_enabled: bool + + :returns: `node_type` compatible string + + + .. py:method:: select_spark_version(long_term_support: bool = False, beta: bool = False, latest: bool = True, ml: bool = False, genomics: bool = False, gpu: bool = False, scala: str = 2.12, spark_version: str, photon: bool = False, graviton: bool = False) -> str + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + Selects the latest Databricks Runtime Version. + + :param long_term_support: bool + :param beta: bool + :param latest: bool + :param ml: bool + :param gpu: bool + :param scala: bool + :param spark_version: bool + :param photon: bool + :param graviton: bool + + :returns: `spark_version` compatible string + + + .. py:method:: set_permissions(cluster_id: str [, access_control_list: Optional[List[ClusterAccessControlRequest]]]) -> ClusterPermissions + + Set cluster permissions. + + Sets permissions on a cluster. Clusters can inherit permissions from their root object. + + :param cluster_id: str + The cluster for which to get or manage permissions. + :param access_control_list: List[:class:`ClusterAccessControlRequest`] (optional) + + :returns: :class:`ClusterPermissions` + + + .. py:method:: spark_versions() -> GetSparkVersionsResponse + + List available Spark versions. + + Returns the list of available Spark versions. These versions can be used to launch a cluster. + + :returns: :class:`GetSparkVersionsResponse` + + + .. py:method:: start(cluster_id: str) -> Wait[ClusterDetails] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + _ = w.clusters.start(cluster_id=clstr.cluster_id).result() + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Start terminated cluster. + + Starts a terminated Spark cluster with the supplied ID. This works similar to `createCluster` except: + + * The previous cluster id and attributes are preserved. * The cluster starts with the last specified + cluster size. * If the previous cluster was an autoscaling cluster, the current cluster starts with + the minimum number of nodes. * If the cluster is not currently in a `TERMINATED` state, nothing will + happen. * Clusters launched to run a job cannot be started. + + :param cluster_id: str + The cluster to be started. + + :returns: + Long-running operation waiter for :class:`ClusterDetails`. + See :method:wait_get_cluster_running for more details. + + + .. py:method:: start_and_wait(cluster_id: str, timeout: datetime.timedelta = 0:20:00) -> ClusterDetails + + + .. py:method:: unpin(cluster_id: str) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + w.clusters.unpin(cluster_id=clstr.cluster_id) + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Unpin cluster. + + Unpinning a cluster will allow the cluster to eventually be removed from the ListClusters API. + Unpinning a cluster that is not pinned will have no effect. This API can only be called by workspace + admins. + + :param cluster_id: str + + + + + + .. py:method:: update_permissions(cluster_id: str [, access_control_list: Optional[List[ClusterAccessControlRequest]]]) -> ClusterPermissions + + Update cluster permissions. + + Updates the permissions on a cluster. Clusters can inherit permissions from their root object. + + :param cluster_id: str + The cluster for which to get or manage permissions. + :param access_control_list: List[:class:`ClusterAccessControlRequest`] (optional) + + :returns: :class:`ClusterPermissions` + + + .. py:method:: wait_get_cluster_running(cluster_id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[ClusterDetails], None]]) -> ClusterDetails + + + .. py:method:: wait_get_cluster_terminated(cluster_id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[ClusterDetails], None]]) -> ClusterDetails diff --git a/docs/workspace/compute/command_execution.rst b/docs/workspace/compute/command_execution.rst new file mode 100644 index 000000000..a5b94b5a5 --- /dev/null +++ b/docs/workspace/compute/command_execution.rst @@ -0,0 +1,161 @@ +``w.command_execution``: Command Execution +========================================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: CommandExecutionAPI + + This API allows execution of Python, Scala, SQL, or R commands on running Databricks Clusters. + + .. py:method:: cancel( [, cluster_id: Optional[str], command_id: Optional[str], context_id: Optional[str]]) -> Wait[CommandStatusResponse] + + Cancel a command. + + Cancels a currently running command within an execution context. + + The command ID is obtained from a prior successful call to __execute__. + + :param cluster_id: str (optional) + :param command_id: str (optional) + :param context_id: str (optional) + + :returns: + Long-running operation waiter for :class:`CommandStatusResponse`. + See :method:wait_command_status_command_execution_cancelled for more details. + + + .. py:method:: cancel_and_wait( [, cluster_id: Optional[str], command_id: Optional[str], context_id: Optional[str], timeout: datetime.timedelta = 0:20:00]) -> CommandStatusResponse + + + .. py:method:: command_status(cluster_id: str, context_id: str, command_id: str) -> CommandStatusResponse + + Get command info. + + Gets the status of and, if available, the results from a currently executing command. + + The command ID is obtained from a prior successful call to __execute__. + + :param cluster_id: str + :param context_id: str + :param command_id: str + + :returns: :class:`CommandStatusResponse` + + + .. py:method:: context_status(cluster_id: str, context_id: str) -> ContextStatusResponse + + Get status. + + Gets the status for an execution context. + + :param cluster_id: str + :param context_id: str + + :returns: :class:`ContextStatusResponse` + + + .. py:method:: create( [, cluster_id: Optional[str], language: Optional[Language]]) -> Wait[ContextStatusResponse] + + + Usage: + + .. code-block:: + + import os + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import compute + + w = WorkspaceClient() + + cluster_id = os.environ["TEST_DEFAULT_CLUSTER_ID"] + + context = w.command_execution.create(cluster_id=cluster_id, language=compute.Language.PYTHON).result() + + # cleanup + w.command_execution.destroy(cluster_id=cluster_id, context_id=context.id) + + Create an execution context. + + Creates an execution context for running cluster commands. + + If successful, this method returns the ID of the new execution context. + + :param cluster_id: str (optional) + Running cluster id + :param language: :class:`Language` (optional) + + :returns: + Long-running operation waiter for :class:`ContextStatusResponse`. + See :method:wait_context_status_command_execution_running for more details. + + + .. py:method:: create_and_wait( [, cluster_id: Optional[str], language: Optional[Language], timeout: datetime.timedelta = 0:20:00]) -> ContextStatusResponse + + + .. py:method:: destroy(cluster_id: str, context_id: str) + + Delete an execution context. + + Deletes an execution context. + + :param cluster_id: str + :param context_id: str + + + + + .. py:method:: execute( [, cluster_id: Optional[str], command: Optional[str], context_id: Optional[str], language: Optional[Language]]) -> Wait[CommandStatusResponse] + + + Usage: + + .. code-block:: + + import os + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import compute + + w = WorkspaceClient() + + cluster_id = os.environ["TEST_DEFAULT_CLUSTER_ID"] + + context = w.command_execution.create(cluster_id=cluster_id, language=compute.Language.PYTHON).result() + + text_results = w.command_execution.execute(cluster_id=cluster_id, + context_id=context.id, + language=compute.Language.PYTHON, + command="print(1)").result() + + # cleanup + w.command_execution.destroy(cluster_id=cluster_id, context_id=context.id) + + Run a command. + + Runs a cluster command in the given execution context, using the provided language. + + If successful, it returns an ID for tracking the status of the command's execution. + + :param cluster_id: str (optional) + Running cluster id + :param command: str (optional) + Executable code + :param context_id: str (optional) + Running context id + :param language: :class:`Language` (optional) + + :returns: + Long-running operation waiter for :class:`CommandStatusResponse`. + See :method:wait_command_status_command_execution_finished_or_error for more details. + + + .. py:method:: execute_and_wait( [, cluster_id: Optional[str], command: Optional[str], context_id: Optional[str], language: Optional[Language], timeout: datetime.timedelta = 0:20:00]) -> CommandStatusResponse + + + .. py:method:: wait_command_status_command_execution_cancelled(cluster_id: str, command_id: str, context_id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[CommandStatusResponse], None]]) -> CommandStatusResponse + + + .. py:method:: wait_command_status_command_execution_finished_or_error(cluster_id: str, command_id: str, context_id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[CommandStatusResponse], None]]) -> CommandStatusResponse + + + .. py:method:: wait_context_status_command_execution_running(cluster_id: str, context_id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[ContextStatusResponse], None]]) -> ContextStatusResponse diff --git a/docs/workspace/compute/global_init_scripts.rst b/docs/workspace/compute/global_init_scripts.rst new file mode 100644 index 000000000..3688ed25c --- /dev/null +++ b/docs/workspace/compute/global_init_scripts.rst @@ -0,0 +1,180 @@ +``w.global_init_scripts``: Global Init Scripts +============================================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: GlobalInitScriptsAPI + + The Global Init Scripts API enables Workspace administrators to configure global initialization scripts + for their workspace. These scripts run on every node in every cluster in the workspace. + + **Important:** Existing clusters must be restarted to pick up any changes made to global init scripts. + Global init scripts are run in order. If the init script returns with a bad exit code, the Apache Spark + container fails to launch and init scripts with later position are skipped. If enough containers fail, the + entire cluster fails with a `GLOBAL_INIT_SCRIPT_FAILURE` error code. + + .. py:method:: create(name: str, script: str [, enabled: Optional[bool], position: Optional[int]]) -> CreateResponse + + + Usage: + + .. code-block:: + + import base64 + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.global_init_scripts.create(name=f'sdk-{time.time_ns()}', + script=base64.b64encode(("echo 1").encode()).decode(), + enabled=True, + position=10) + + # cleanup + w.global_init_scripts.delete(script_id=created.script_id) + + Create init script. + + Creates a new global init script in this workspace. + + :param name: str + The name of the script + :param script: str + The Base64-encoded content of the script. + :param enabled: bool (optional) + Specifies whether the script is enabled. The script runs only if enabled. + :param position: int (optional) + The position of a global init script, where 0 represents the first script to run, 1 is the second + script to run, in ascending order. + + If you omit the numeric position for a new global init script, it defaults to last position. It will + run after all current scripts. Setting any value greater than the position of the last script is + equivalent to the last position. Example: Take three existing scripts with positions 0, 1, and 2. + Any position of (3) or greater puts the script in the last position. If an explicit position value + conflicts with an existing script value, your request succeeds, but the original script at that + position and all later scripts have their positions incremented by 1. + + :returns: :class:`CreateResponse` + + + .. py:method:: delete(script_id: str) + + Delete init script. + + Deletes a global init script. + + :param script_id: str + The ID of the global init script. + + + + + .. py:method:: get(script_id: str) -> GlobalInitScriptDetailsWithContent + + + Usage: + + .. code-block:: + + import base64 + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.global_init_scripts.create(name=f'sdk-{time.time_ns()}', + script=base64.b64encode(("echo 1").encode()).decode(), + enabled=True, + position=10) + + by_id = w.global_init_scripts.get(script_id=created.script_id) + + # cleanup + w.global_init_scripts.delete(script_id=created.script_id) + + Get an init script. + + Gets all the details of a script, including its Base64-encoded contents. + + :param script_id: str + The ID of the global init script. + + :returns: :class:`GlobalInitScriptDetailsWithContent` + + + .. py:method:: list() -> Iterator[GlobalInitScriptDetails] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.global_init_scripts.list() + + Get init scripts. + + Get a list of all global init scripts for this workspace. This returns all properties for each script + but **not** the script contents. To retrieve the contents of a script, use the [get a global init + script](#operation/get-script) operation. + + :returns: Iterator over :class:`GlobalInitScriptDetails` + + + .. py:method:: update(script_id: str, name: str, script: str [, enabled: Optional[bool], position: Optional[int]]) + + + Usage: + + .. code-block:: + + import base64 + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.global_init_scripts.create(name=f'sdk-{time.time_ns()}', + script=base64.b64encode(("echo 1").encode()).decode(), + enabled=True, + position=10) + + w.global_init_scripts.update(script_id=created.script_id, + name=f'sdk-{time.time_ns()}', + script=base64.b64encode(("echo 2").encode()).decode()) + + # cleanup + w.global_init_scripts.delete(script_id=created.script_id) + + Update init script. + + Updates a global init script, specifying only the fields to change. All fields are optional. + Unspecified fields retain their current value. + + :param script_id: str + The ID of the global init script. + :param name: str + The name of the script + :param script: str + The Base64-encoded content of the script. + :param enabled: bool (optional) + Specifies whether the script is enabled. The script runs only if enabled. + :param position: int (optional) + The position of a script, where 0 represents the first script to run, 1 is the second script to run, + in ascending order. To move the script to run first, set its position to 0. + + To move the script to the end, set its position to any value greater or equal to the position of the + last script. Example, three existing scripts with positions 0, 1, and 2. Any position value of 2 or + greater puts the script in the last position (2). + + If an explicit position value conflicts with an existing script, your request succeeds, but the + original script at that position and all later scripts have their positions incremented by 1. + + + \ No newline at end of file diff --git a/docs/workspace/compute/index.rst b/docs/workspace/compute/index.rst new file mode 100644 index 000000000..b13a21610 --- /dev/null +++ b/docs/workspace/compute/index.rst @@ -0,0 +1,17 @@ + +Compute +======= + +Use and configure compute for Databricks + +.. toctree:: + :maxdepth: 1 + + cluster_policies + clusters + command_execution + global_init_scripts + instance_pools + instance_profiles + libraries + policy_families \ No newline at end of file diff --git a/docs/workspace/compute/instance_pools.rst b/docs/workspace/compute/instance_pools.rst new file mode 100644 index 000000000..277844170 --- /dev/null +++ b/docs/workspace/compute/instance_pools.rst @@ -0,0 +1,269 @@ +``w.instance_pools``: Instance Pools +==================================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: InstancePoolsAPI + + Instance Pools API are used to create, edit, delete and list instance pools by using ready-to-use cloud + instances which reduces a cluster start and auto-scaling times. + + Databricks pools reduce cluster start and auto-scaling times by maintaining a set of idle, ready-to-use + instances. When a cluster is attached to a pool, cluster nodes are created using the pool’s idle + instances. If the pool has no idle instances, the pool expands by allocating a new instance from the + instance provider in order to accommodate the cluster’s request. When a cluster releases an instance, it + returns to the pool and is free for another cluster to use. Only clusters attached to a pool can use that + pool’s idle instances. + + You can specify a different pool for the driver node and worker nodes, or use the same pool for both. + + Databricks does not charge DBUs while instances are idle in the pool. Instance provider billing does + apply. See pricing. + + .. py:method:: create(instance_pool_name: str, node_type_id: str [, aws_attributes: Optional[InstancePoolAwsAttributes], azure_attributes: Optional[InstancePoolAzureAttributes], custom_tags: Optional[Dict[str, str]], disk_spec: Optional[DiskSpec], enable_elastic_disk: Optional[bool], gcp_attributes: Optional[InstancePoolGcpAttributes], idle_instance_autotermination_minutes: Optional[int], max_capacity: Optional[int], min_idle_instances: Optional[int], preloaded_docker_images: Optional[List[DockerImage]], preloaded_spark_versions: Optional[List[str]]]) -> CreateInstancePoolResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + smallest = w.clusters.select_node_type(local_disk=True) + + created = w.instance_pools.create(instance_pool_name=f'sdk-{time.time_ns()}', node_type_id=smallest) + + # cleanup + w.instance_pools.delete(instance_pool_id=created.instance_pool_id) + + Create a new instance pool. + + Creates a new instance pool using idle and ready-to-use cloud instances. + + :param instance_pool_name: str + Pool name requested by the user. Pool name must be unique. Length must be between 1 and 100 + characters. + :param node_type_id: str + This field encodes, through a single value, the resources available to each of the Spark nodes in + this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute + intensive workloads. A list of available node types can be retrieved by using the + :method:clusters/listNodeTypes API call. + :param aws_attributes: :class:`InstancePoolAwsAttributes` (optional) + Attributes related to instance pools running on Amazon Web Services. If not specified at pool + creation, a set of default values will be used. + :param azure_attributes: :class:`InstancePoolAzureAttributes` (optional) + Attributes related to instance pools running on Azure. If not specified at pool creation, a set of + default values will be used. + :param custom_tags: Dict[str,str] (optional) + Additional tags for pool resources. Databricks will tag all pool resources (e.g., AWS instances and + EBS volumes) with these tags in addition to `default_tags`. Notes: + + - Currently, Databricks allows at most 45 custom tags + :param disk_spec: :class:`DiskSpec` (optional) + Defines the specification of the disks that will be attached to all spark containers. + :param enable_elastic_disk: bool (optional) + Autoscaling Local Storage: when enabled, this instances in this pool will dynamically acquire + additional disk space when its Spark workers are running low on disk space. In AWS, this feature + requires specific AWS permissions to function correctly - refer to the User Guide for more details. + :param gcp_attributes: :class:`InstancePoolGcpAttributes` (optional) + Attributes related to instance pools running on Google Cloud Platform. If not specified at pool + creation, a set of default values will be used. + :param idle_instance_autotermination_minutes: int (optional) + Automatically terminates the extra instances in the pool cache after they are inactive for this time + in minutes if min_idle_instances requirement is already met. If not set, the extra pool instances + will be automatically terminated after a default timeout. If specified, the threshold must be + between 0 and 10000 minutes. Users can also set this value to 0 to instantly remove idle instances + from the cache if min cache size could still hold. + :param max_capacity: int (optional) + Maximum number of outstanding instances to keep in the pool, including both instances used by + clusters and idle instances. Clusters that require further instance provisioning will fail during + upsize requests. + :param min_idle_instances: int (optional) + Minimum number of idle instances to keep in the instance pool + :param preloaded_docker_images: List[:class:`DockerImage`] (optional) + Custom Docker Image BYOC + :param preloaded_spark_versions: List[str] (optional) + A list containing at most one preloaded Spark image version for the pool. Pool-backed clusters + started with the preloaded Spark version will start faster. A list of available Spark versions can + be retrieved by using the :method:clusters/sparkVersions API call. + + :returns: :class:`CreateInstancePoolResponse` + + + .. py:method:: delete(instance_pool_id: str) + + Delete an instance pool. + + Deletes the instance pool permanently. The idle instances in the pool are terminated asynchronously. + + :param instance_pool_id: str + The instance pool to be terminated. + + + + + .. py:method:: edit(instance_pool_id: str, instance_pool_name: str, node_type_id: str [, custom_tags: Optional[Dict[str, str]], idle_instance_autotermination_minutes: Optional[int], max_capacity: Optional[int], min_idle_instances: Optional[int]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + smallest = w.clusters.select_node_type(local_disk=True) + + created = w.instance_pools.create(instance_pool_name=f'sdk-{time.time_ns()}', node_type_id=smallest) + + w.instance_pools.edit(instance_pool_id=created.instance_pool_id, + instance_pool_name=f'sdk-{time.time_ns()}', + node_type_id=smallest) + + # cleanup + w.instance_pools.delete(instance_pool_id=created.instance_pool_id) + + Edit an existing instance pool. + + Modifies the configuration of an existing instance pool. + + :param instance_pool_id: str + Instance pool ID + :param instance_pool_name: str + Pool name requested by the user. Pool name must be unique. Length must be between 1 and 100 + characters. + :param node_type_id: str + This field encodes, through a single value, the resources available to each of the Spark nodes in + this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute + intensive workloads. A list of available node types can be retrieved by using the + :method:clusters/listNodeTypes API call. + :param custom_tags: Dict[str,str] (optional) + Additional tags for pool resources. Databricks will tag all pool resources (e.g., AWS instances and + EBS volumes) with these tags in addition to `default_tags`. Notes: + + - Currently, Databricks allows at most 45 custom tags + :param idle_instance_autotermination_minutes: int (optional) + Automatically terminates the extra instances in the pool cache after they are inactive for this time + in minutes if min_idle_instances requirement is already met. If not set, the extra pool instances + will be automatically terminated after a default timeout. If specified, the threshold must be + between 0 and 10000 minutes. Users can also set this value to 0 to instantly remove idle instances + from the cache if min cache size could still hold. + :param max_capacity: int (optional) + Maximum number of outstanding instances to keep in the pool, including both instances used by + clusters and idle instances. Clusters that require further instance provisioning will fail during + upsize requests. + :param min_idle_instances: int (optional) + Minimum number of idle instances to keep in the instance pool + + + + + .. py:method:: get(instance_pool_id: str) -> GetInstancePool + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + smallest = w.clusters.select_node_type(local_disk=True) + + created = w.instance_pools.create(instance_pool_name=f'sdk-{time.time_ns()}', node_type_id=smallest) + + by_id = w.instance_pools.get(instance_pool_id=created.instance_pool_id) + + # cleanup + w.instance_pools.delete(instance_pool_id=created.instance_pool_id) + + Get instance pool information. + + Retrieve the information for an instance pool based on its identifier. + + :param instance_pool_id: str + The canonical unique identifier for the instance pool. + + :returns: :class:`GetInstancePool` + + + .. py:method:: get_permission_levels(instance_pool_id: str) -> GetInstancePoolPermissionLevelsResponse + + Get instance pool permission levels. + + Gets the permission levels that a user can have on an object. + + :param instance_pool_id: str + The instance pool for which to get or manage permissions. + + :returns: :class:`GetInstancePoolPermissionLevelsResponse` + + + .. py:method:: get_permissions(instance_pool_id: str) -> InstancePoolPermissions + + Get instance pool permissions. + + Gets the permissions of an instance pool. Instance pools can inherit permissions from their root + object. + + :param instance_pool_id: str + The instance pool for which to get or manage permissions. + + :returns: :class:`InstancePoolPermissions` + + + .. py:method:: list() -> Iterator[InstancePoolAndStats] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.instance_pools.list() + + List instance pool info. + + Gets a list of instance pools with their statistics. + + :returns: Iterator over :class:`InstancePoolAndStats` + + + .. py:method:: set_permissions(instance_pool_id: str [, access_control_list: Optional[List[InstancePoolAccessControlRequest]]]) -> InstancePoolPermissions + + Set instance pool permissions. + + Sets permissions on an instance pool. Instance pools can inherit permissions from their root object. + + :param instance_pool_id: str + The instance pool for which to get or manage permissions. + :param access_control_list: List[:class:`InstancePoolAccessControlRequest`] (optional) + + :returns: :class:`InstancePoolPermissions` + + + .. py:method:: update_permissions(instance_pool_id: str [, access_control_list: Optional[List[InstancePoolAccessControlRequest]]]) -> InstancePoolPermissions + + Update instance pool permissions. + + Updates the permissions on an instance pool. Instance pools can inherit permissions from their root + object. + + :param instance_pool_id: str + The instance pool for which to get or manage permissions. + :param access_control_list: List[:class:`InstancePoolAccessControlRequest`] (optional) + + :returns: :class:`InstancePoolPermissions` + \ No newline at end of file diff --git a/docs/workspace/compute/instance_profiles.rst b/docs/workspace/compute/instance_profiles.rst new file mode 100644 index 000000000..a7a25f869 --- /dev/null +++ b/docs/workspace/compute/instance_profiles.rst @@ -0,0 +1,144 @@ +``w.instance_profiles``: Instance Profiles +========================================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: InstanceProfilesAPI + + The Instance Profiles API allows admins to add, list, and remove instance profiles that users can launch + clusters with. Regular users can list the instance profiles available to them. See [Secure access to S3 + buckets] using instance profiles for more information. + + [Secure access to S3 buckets]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/instance-profiles.html + + .. py:method:: add(instance_profile_arn: str [, iam_role_arn: Optional[str], is_meta_instance_profile: Optional[bool], skip_validation: Optional[bool]]) + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + arn = "arn:aws:iam::000000000000:instance-profile/abc" + + w.instance_profiles.add(instance_profile_arn=arn, + skip_validation=True, + iam_role_arn="arn:aws:iam::000000000000:role/bcd") + + Register an instance profile. + + In the UI, you can select the instance profile when launching clusters. This API is only available to + admin users. + + :param instance_profile_arn: str + The AWS ARN of the instance profile to register with Databricks. This field is required. + :param iam_role_arn: str (optional) + The AWS IAM role ARN of the role associated with the instance profile. This field is required if + your role name and instance profile name do not match and you want to use the instance profile with + [Databricks SQL Serverless]. + + Otherwise, this field is optional. + + [Databricks SQL Serverless]: https://docs.databricks.com/sql/admin/serverless.html + :param is_meta_instance_profile: bool (optional) + Boolean flag indicating whether the instance profile should only be used in credential passthrough + scenarios. If true, it means the instance profile contains an meta IAM role which could assume a + wide range of roles. Therefore it should always be used with authorization. This field is optional, + the default value is `false`. + :param skip_validation: bool (optional) + By default, Databricks validates that it has sufficient permissions to launch instances with the + instance profile. This validation uses AWS dry-run mode for the RunInstances API. If validation + fails with an error message that does not indicate an IAM related permission issue, (e.g. “Your + requested instance type is not supported in your requested availability zone”), you can pass this + flag to skip the validation and forcibly add the instance profile. + + + + + .. py:method:: edit(instance_profile_arn: str [, iam_role_arn: Optional[str], is_meta_instance_profile: Optional[bool]]) + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + arn = "arn:aws:iam::000000000000:instance-profile/abc" + + w.instance_profiles.edit(instance_profile_arn=arn, iam_role_arn="arn:aws:iam::000000000000:role/bcdf") + + Edit an instance profile. + + The only supported field to change is the optional IAM role ARN associated with the instance profile. + It is required to specify the IAM role ARN if both of the following are true: + + * Your role name and instance profile name do not match. The name is the part after the last slash in + each ARN. * You want to use the instance profile with [Databricks SQL Serverless]. + + To understand where these fields are in the AWS console, see [Enable serverless SQL warehouses]. + + This API is only available to admin users. + + [Databricks SQL Serverless]: https://docs.databricks.com/sql/admin/serverless.html + [Enable serverless SQL warehouses]: https://docs.databricks.com/sql/admin/serverless.html + + :param instance_profile_arn: str + The AWS ARN of the instance profile to register with Databricks. This field is required. + :param iam_role_arn: str (optional) + The AWS IAM role ARN of the role associated with the instance profile. This field is required if + your role name and instance profile name do not match and you want to use the instance profile with + [Databricks SQL Serverless]. + + Otherwise, this field is optional. + + [Databricks SQL Serverless]: https://docs.databricks.com/sql/admin/serverless.html + :param is_meta_instance_profile: bool (optional) + Boolean flag indicating whether the instance profile should only be used in credential passthrough + scenarios. If true, it means the instance profile contains an meta IAM role which could assume a + wide range of roles. Therefore it should always be used with authorization. This field is optional, + the default value is `false`. + + + + + .. py:method:: list() -> Iterator[InstanceProfile] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.instance_profiles.list() + + List available instance profiles. + + List the instance profiles that the calling user can use to launch a cluster. + + This API is available to all users. + + :returns: Iterator over :class:`InstanceProfile` + + + .. py:method:: remove(instance_profile_arn: str) + + Remove the instance profile. + + Remove the instance profile with the provided ARN. Existing clusters with this instance profile will + continue to function. + + This API is only accessible to admin users. + + :param instance_profile_arn: str + The ARN of the instance profile to remove. This field is required. + + + \ No newline at end of file diff --git a/docs/workspace/compute/libraries.rst b/docs/workspace/compute/libraries.rst new file mode 100644 index 000000000..41ed1c72e --- /dev/null +++ b/docs/workspace/compute/libraries.rst @@ -0,0 +1,90 @@ +``w.libraries``: Managed Libraries +================================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: LibrariesAPI + + The Libraries API allows you to install and uninstall libraries and get the status of libraries on a + cluster. + + To make third-party or custom code available to notebooks and jobs running on your clusters, you can + install a library. Libraries can be written in Python, Java, Scala, and R. You can upload Java, Scala, and + Python libraries and point to external packages in PyPI, Maven, and CRAN repositories. + + Cluster libraries can be used by all notebooks running on a cluster. You can install a cluster library + directly from a public repository such as PyPI or Maven, using a previously installed workspace library, + or using an init script. + + When you install a library on a cluster, a notebook already attached to that cluster will not immediately + see the new library. You must first detach and then reattach the notebook to the cluster. + + When you uninstall a library from a cluster, the library is removed only when you restart the cluster. + Until you restart the cluster, the status of the uninstalled library appears as Uninstall pending restart. + + .. py:method:: all_cluster_statuses() -> ListAllClusterLibraryStatusesResponse + + Get all statuses. + + Get the status of all libraries on all clusters. A status will be available for all libraries + installed on this cluster via the API or the libraries UI as well as libraries set to be installed on + all clusters via the libraries UI. + + :returns: :class:`ListAllClusterLibraryStatusesResponse` + + + .. py:method:: cluster_status(cluster_id: str) -> Iterator[LibraryFullStatus] + + Get status. + + Get the status of libraries on a cluster. A status will be available for all libraries installed on + this cluster via the API or the libraries UI as well as libraries set to be installed on all clusters + via the libraries UI. The order of returned libraries will be as follows. + + 1. Libraries set to be installed on this cluster will be returned first. Within this group, the final + order will be order in which the libraries were added to the cluster. + + 2. Libraries set to be installed on all clusters are returned next. Within this group there is no + order guarantee. + + 3. Libraries that were previously requested on this cluster or on all clusters, but now marked for + removal. Within this group there is no order guarantee. + + :param cluster_id: str + Unique identifier of the cluster whose status should be retrieved. + + :returns: Iterator over :class:`LibraryFullStatus` + + + .. py:method:: install(cluster_id: str, libraries: List[Library]) + + Add a library. + + Add libraries to be installed on a cluster. The installation is asynchronous; it happens in the + background after the completion of this request. + + **Note**: The actual set of libraries to be installed on a cluster is the union of the libraries + specified via this method and the libraries set to be installed on all clusters via the libraries UI. + + :param cluster_id: str + Unique identifier for the cluster on which to install these libraries. + :param libraries: List[:class:`Library`] + The libraries to install. + + + + + .. py:method:: uninstall(cluster_id: str, libraries: List[Library]) + + Uninstall libraries. + + Set libraries to be uninstalled on a cluster. The libraries won't be uninstalled until the cluster is + restarted. Uninstalling libraries that are not installed on the cluster will have no impact but is not + an error. + + :param cluster_id: str + Unique identifier for the cluster on which to uninstall these libraries. + :param libraries: List[:class:`Library`] + The libraries to uninstall. + + + \ No newline at end of file diff --git a/docs/workspace/compute/policy_families.rst b/docs/workspace/compute/policy_families.rst new file mode 100644 index 000000000..43194ef01 --- /dev/null +++ b/docs/workspace/compute/policy_families.rst @@ -0,0 +1,66 @@ +``w.policy_families``: Policy Families +====================================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: PolicyFamiliesAPI + + View available policy families. A policy family contains a policy definition providing best practices for + configuring clusters for a particular use case. + + Databricks manages and provides policy families for several common cluster use cases. You cannot create, + edit, or delete policy families. + + Policy families cannot be used directly to create clusters. Instead, you create cluster policies using a + policy family. Cluster policies created using a policy family inherit the policy family's policy + definition. + + .. py:method:: get(policy_family_id: str) -> PolicyFamily + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import compute + + w = WorkspaceClient() + + all = w.policy_families.list(compute.ListPolicyFamiliesRequest()) + + first_family = w.policy_families.get(policy_family_id=all[0].policy_family_id) + + Get policy family information. + + Retrieve the information for an policy family based on its identifier. + + :param policy_family_id: str + + :returns: :class:`PolicyFamily` + + + .. py:method:: list( [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[PolicyFamily] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import compute + + w = WorkspaceClient() + + all = w.policy_families.list(compute.ListPolicyFamiliesRequest()) + + List policy families. + + Retrieve a list of policy families. This API is paginated. + + :param max_results: int (optional) + The max number of policy families to return. + :param page_token: str (optional) + A token that can be used to get the next page of results. + + :returns: Iterator over :class:`PolicyFamily` + \ No newline at end of file diff --git a/docs/workspace/dashboards/index.rst b/docs/workspace/dashboards/index.rst new file mode 100644 index 000000000..756c9b549 --- /dev/null +++ b/docs/workspace/dashboards/index.rst @@ -0,0 +1,10 @@ + +Dashboards +========== + +Manage Lakeview dashboards + +.. toctree:: + :maxdepth: 1 + + lakeview \ No newline at end of file diff --git a/docs/workspace/dashboards/lakeview.rst b/docs/workspace/dashboards/lakeview.rst new file mode 100644 index 000000000..56e9bfb4b --- /dev/null +++ b/docs/workspace/dashboards/lakeview.rst @@ -0,0 +1,25 @@ +``w.lakeview``: Lakeview +======================== +.. currentmodule:: databricks.sdk.service.dashboards + +.. py:class:: LakeviewAPI + + These APIs provide specific management operations for Lakeview dashboards. Generic resource management can + be done with Workspace API (import, export, get-status, list, delete). + + .. py:method:: publish(dashboard_id: str [, embed_credentials: Optional[bool], warehouse_id: Optional[str]]) + + Publish dashboard. + + Publish the current draft dashboard. + + :param dashboard_id: str + UUID identifying the dashboard to be published. + :param embed_credentials: bool (optional) + Flag to indicate if the publisher's credentials should be embedded in the published dashboard. These + embedded credentials will be used to execute the published dashboard's queries. + :param warehouse_id: str (optional) + The ID of the warehouse that can be used to override the warehouse which was set in the draft. + + + \ No newline at end of file diff --git a/docs/workspace/files/dbfs.rst b/docs/workspace/files/dbfs.rst new file mode 100644 index 000000000..9bde1e8b6 --- /dev/null +++ b/docs/workspace/files/dbfs.rst @@ -0,0 +1,238 @@ +``w.dbfs``: DBFS +================ +.. currentmodule:: databricks.sdk.service.files + +.. py:class:: DbfsExt + + DBFS API makes it simple to interact with various data sources without having to include a users + credentials every time to read a file. + + .. py:method:: add_block(handle: int, data: str) + + Append data block. + + Appends a block of data to the stream specified by the input handle. If the handle does not exist, + this call will throw an exception with `RESOURCE_DOES_NOT_EXIST`. + + If the block of data exceeds 1 MB, this call will throw an exception with `MAX_BLOCK_SIZE_EXCEEDED`. + + :param handle: int + The handle on an open stream. + :param data: str + The base64-encoded data to append to the stream. This has a limit of 1 MB. + + + + + .. py:method:: close(handle: int) + + Close the stream. + + Closes the stream specified by the input handle. If the handle does not exist, this call throws an + exception with `RESOURCE_DOES_NOT_EXIST`. + + :param handle: int + The handle on an open stream. + + + + + .. py:method:: copy(src: str, dst: str [, recursive: bool = False, overwrite: bool = False]) + + Copy files between DBFS and local filesystems + + .. py:method:: create(path: str [, overwrite: Optional[bool]]) -> CreateResponse + + Open a stream. + + Opens a stream to write to a file and returns a handle to this stream. There is a 10 minute idle + timeout on this handle. If a file or directory already exists on the given path and __overwrite__ is + set to `false`, this call throws an exception with `RESOURCE_ALREADY_EXISTS`. + + A typical workflow for file upload would be: + + 1. Issue a `create` call and get a handle. 2. Issue one or more `add-block` calls with the handle you + have. 3. Issue a `close` call with the handle you have. + + :param path: str + The path of the new file. The path should be the absolute DBFS path. + :param overwrite: bool (optional) + The flag that specifies whether to overwrite existing file/files. + + :returns: :class:`CreateResponse` + + + .. py:method:: delete(path: str [, recursive: Optional[bool]]) + + Delete a file/directory. + + Delete the file or directory (optionally recursively delete all files in the directory). This call + throws an exception with `IO_ERROR` if the path is a non-empty directory and `recursive` is set to + `false` or on other similar errors. + + When you delete a large number of files, the delete operation is done in increments. The call returns + a response after approximately 45 seconds with an error message (503 Service Unavailable) asking you + to re-invoke the delete operation until the directory structure is fully deleted. + + For operations that delete more than 10K files, we discourage using the DBFS REST API, but advise you + to perform such operations in the context of a cluster, using the [File system utility + (dbutils.fs)](/dev-tools/databricks-utils.html#dbutils-fs). `dbutils.fs` covers the functional scope + of the DBFS REST API, but from notebooks. Running such operations using notebooks provides better + control and manageability, such as selective deletes, and the possibility to automate periodic delete + jobs. + + :param path: str + The path of the file or directory to delete. The path should be the absolute DBFS path. + :param recursive: bool (optional) + Whether or not to recursively delete the directory's contents. Deleting empty directories can be + done without providing the recursive flag. + + + + + .. py:method:: download(path: str) -> BinaryIO + + + Usage: + + .. code-block:: + + import io + import pathlib + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + root = pathlib.Path(f'/tmp/{time.time_ns()}') + + f = io.BytesIO(b"some text data") + w.dbfs.upload(f'{root}/01', f) + + with w.dbfs.download(f'{root}/01') as f: + assert f.read() == b"some text data" + + Download file from DBFS + + .. py:method:: exists(path: str) -> bool + + If file exists on DBFS + + .. py:method:: get_status(path: str) -> FileInfo + + Get the information of a file or directory. + + Gets the file information for a file or directory. If the file or directory does not exist, this call + throws an exception with `RESOURCE_DOES_NOT_EXIST`. + + :param path: str + The path of the file or directory. The path should be the absolute DBFS path. + + :returns: :class:`FileInfo` + + + .. py:method:: list(path: str [, recursive: bool = False]) -> Iterator[files.FileInfo] + + List directory contents or file details. + + List the contents of a directory, or details of the file. If the file or directory does not exist, + this call throws an exception with `RESOURCE_DOES_NOT_EXIST`. + + When calling list on a large directory, the list operation will time out after approximately 60 + seconds. + + :param recursive: traverse deep into directory tree + :returns iterator of metadata for every file + + + .. py:method:: mkdirs(path: str) + + Create a directory. + + Creates the given directory and necessary parent directories if they do not exist. If a file (not a + directory) exists at any prefix of the input path, this call throws an exception with + `RESOURCE_ALREADY_EXISTS`. **Note**: If this operation fails, it might have succeeded in creating some + of the necessary parent directories. + + :param path: str + The path of the new directory. The path should be the absolute DBFS path. + + + + + .. py:method:: move(source_path: str, destination_path: str) + + Move a file. + + Moves a file from one location to another location within DBFS. If the source file does not exist, + this call throws an exception with `RESOURCE_DOES_NOT_EXIST`. If a file already exists in the + destination path, this call throws an exception with `RESOURCE_ALREADY_EXISTS`. If the given source + path is a directory, this call always recursively moves all files.", + + :param source_path: str + The source path of the file or directory. The path should be the absolute DBFS path. + :param destination_path: str + The destination path of the file or directory. The path should be the absolute DBFS path. + + + + + .. py:method:: move_(src: str, dst: str [, recursive: bool = False, overwrite: bool = False]) + + Move files between local and DBFS systems + + .. py:method:: open(path: str [, read: bool = False, write: bool = False, overwrite: bool = False]) -> _DbfsIO + + + .. py:method:: put(path: str [, contents: Optional[str], overwrite: Optional[bool]]) + + Upload a file. + + Uploads a file through the use of multipart form post. It is mainly used for streaming uploads, but + can also be used as a convenient single call for data upload. + + Alternatively you can pass contents as base64 string. + + The amount of data that can be passed (when not streaming) using the __contents__ parameter is limited + to 1 MB. `MAX_BLOCK_SIZE_EXCEEDED` will be thrown if this limit is exceeded. + + If you want to upload large files, use the streaming upload. For details, see :method:dbfs/create, + :method:dbfs/addBlock, :method:dbfs/close. + + :param path: str + The path of the new file. The path should be the absolute DBFS path. + :param contents: str (optional) + This parameter might be absent, and instead a posted file will be used. + :param overwrite: bool (optional) + The flag that specifies whether to overwrite existing file/files. + + + + + .. py:method:: read(path: str [, length: Optional[int], offset: Optional[int]]) -> ReadResponse + + Get the contents of a file. + + Returns the contents of a file. If the file does not exist, this call throws an exception with + `RESOURCE_DOES_NOT_EXIST`. If the path is a directory, the read length is negative, or if the offset + is negative, this call throws an exception with `INVALID_PARAMETER_VALUE`. If the read length exceeds + 1 MB, this call throws an exception with `MAX_READ_SIZE_EXCEEDED`. + + If `offset + length` exceeds the number of bytes in a file, it reads the contents until the end of + file.", + + :param path: str + The path of the file to read. The path should be the absolute DBFS path. + :param length: int (optional) + The number of bytes to read starting from the offset. This has a limit of 1 MB, and a default value + of 0.5 MB. + :param offset: int (optional) + The offset to read from in bytes. + + :returns: :class:`ReadResponse` + + + .. py:method:: upload(path: str, src: BinaryIO [, overwrite: bool = False]) + + Upload file to DBFS \ No newline at end of file diff --git a/docs/workspace/files/index.rst b/docs/workspace/files/index.rst new file mode 100644 index 000000000..b2276ad6b --- /dev/null +++ b/docs/workspace/files/index.rst @@ -0,0 +1,10 @@ + +File Management +=============== + +Manage files on Databricks in a filesystem-like interface + +.. toctree:: + :maxdepth: 1 + + dbfs \ No newline at end of file diff --git a/docs/workspace/iam/account_access_control_proxy.rst b/docs/workspace/iam/account_access_control_proxy.rst new file mode 100644 index 000000000..3265b29cc --- /dev/null +++ b/docs/workspace/iam/account_access_control_proxy.rst @@ -0,0 +1,56 @@ +``w.account_access_control_proxy``: Account Access Control Proxy +================================================================ +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: AccountAccessControlProxyAPI + + These APIs manage access rules on resources in an account. Currently, only grant rules are supported. A + grant rule specifies a role assigned to a set of principals. A list of rules attached to a resource is + called a rule set. A workspace must belong to an account for these APIs to work. + + .. py:method:: get_assignable_roles_for_resource(resource: str) -> GetAssignableRolesForResourceResponse + + Get assignable roles for a resource. + + Gets all the roles that can be granted on an account-level resource. A role is grantable if the rule + set on the resource can contain an access rule of the role. + + :param resource: str + The resource name for which assignable roles will be listed. + + :returns: :class:`GetAssignableRolesForResourceResponse` + + + .. py:method:: get_rule_set(name: str, etag: str) -> RuleSetResponse + + Get a rule set. + + Get a rule set by its name. A rule set is always attached to a resource and contains a list of access + rules on the said resource. Currently only a default rule set for each resource is supported. + + :param name: str + The ruleset name associated with the request. + :param etag: str + Etag used for versioning. The response is at least as fresh as the eTag provided. Etag is used for + optimistic concurrency control as a way to help prevent simultaneous updates of a rule set from + overwriting each other. It is strongly suggested that systems make use of the etag in the read -> + modify -> write pattern to perform rule set updates in order to avoid race conditions that is get an + etag from a GET rule set request, and pass it with the PUT update request to identify the rule set + version you are updating. + + :returns: :class:`RuleSetResponse` + + + .. py:method:: update_rule_set(name: str, rule_set: RuleSetUpdateRequest) -> RuleSetResponse + + Update a rule set. + + Replace the rules of a rule set. First, use a GET rule set request to read the current version of the + rule set before modifying it. This pattern helps prevent conflicts between concurrent updates. + + :param name: str + Name of the rule set. + :param rule_set: :class:`RuleSetUpdateRequest` + + :returns: :class:`RuleSetResponse` + \ No newline at end of file diff --git a/docs/workspace/iam/current_user.rst b/docs/workspace/iam/current_user.rst new file mode 100644 index 000000000..b2ba795db --- /dev/null +++ b/docs/workspace/iam/current_user.rst @@ -0,0 +1,27 @@ +``w.current_user``: CurrentUser +=============================== +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: CurrentUserAPI + + This API allows retrieving information about currently authenticated user or service principal. + + .. py:method:: me() -> User + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + me2 = w.current_user.me() + + Get current user info. + + Get details about the current method caller's identity. + + :returns: :class:`User` + \ No newline at end of file diff --git a/docs/workspace/iam/groups.rst b/docs/workspace/iam/groups.rst new file mode 100644 index 000000000..0dd76485b --- /dev/null +++ b/docs/workspace/iam/groups.rst @@ -0,0 +1,185 @@ +``w.groups``: Groups +==================== +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: GroupsAPI + + Groups simplify identity management, making it easier to assign access to Databricks workspace, data, and + other securable objects. + + It is best practice to assign access to workspaces and access-control policies in Unity Catalog to groups, + instead of to users individually. All Databricks workspace identities can be assigned as members of + groups, and members inherit permissions that are assigned to their group. + + .. py:method:: create( [, display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], id: Optional[str], members: Optional[List[ComplexValue]], meta: Optional[ResourceMeta], roles: Optional[List[ComplexValue]], schemas: Optional[List[GroupSchema]]]) -> Group + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + # cleanup + w.groups.delete(id=group.id) + + Create a new group. + + Creates a group in the Databricks workspace with a unique name, using the supplied group details. + + :param display_name: str (optional) + String that represents a human-readable group name + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the group. See [assigning entitlements] for a full list of supported + values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param id: str (optional) + Databricks group ID + :param members: List[:class:`ComplexValue`] (optional) + :param meta: :class:`ResourceMeta` (optional) + Container for the group identifier. Workspace local versus account. + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`GroupSchema`] (optional) + The schema of the group. + + :returns: :class:`Group` + + + .. py:method:: delete(id: str) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + w.groups.delete(id=group.id) + + Delete a group. + + Deletes a group from the Databricks workspace. + + :param id: str + Unique ID for a group in the Databricks workspace. + + + + + .. py:method:: get(id: str) -> Group + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + fetch = w.groups.get(id=group.id) + + # cleanup + w.groups.delete(id=group.id) + + Get group details. + + Gets the information for a specific group in the Databricks workspace. + + :param id: str + Unique ID for a group in the Databricks workspace. + + :returns: :class:`Group` + + + .. py:method:: list( [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[ListSortOrder], start_index: Optional[int]]) -> Iterator[Group] + + List group details. + + Gets all details of the groups associated with the Databricks workspace. + + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. + :param sort_order: :class:`ListSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: Iterator over :class:`Group` + + + .. py:method:: patch(id: str [, operations: Optional[List[Patch]], schemas: Optional[List[PatchSchema]]]) + + Update group details. + + Partially updates the details of a group. + + :param id: str + Unique ID for a group in the Databricks workspace. + :param operations: List[:class:`Patch`] (optional) + :param schemas: List[:class:`PatchSchema`] (optional) + The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. + + + + + .. py:method:: update(id: str [, display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], members: Optional[List[ComplexValue]], meta: Optional[ResourceMeta], roles: Optional[List[ComplexValue]], schemas: Optional[List[GroupSchema]]]) + + Replace a group. + + Updates the details of a group by replacing the entire group entity. + + :param id: str + Databricks group ID + :param display_name: str (optional) + String that represents a human-readable group name + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the group. See [assigning entitlements] for a full list of supported + values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param members: List[:class:`ComplexValue`] (optional) + :param meta: :class:`ResourceMeta` (optional) + Container for the group identifier. Workspace local versus account. + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`GroupSchema`] (optional) + The schema of the group. + + + \ No newline at end of file diff --git a/docs/workspace/iam/index.rst b/docs/workspace/iam/index.rst new file mode 100644 index 000000000..11e9318f9 --- /dev/null +++ b/docs/workspace/iam/index.rst @@ -0,0 +1,15 @@ + +Identity and Access Management +============================== + +Manage users, service principals, groups and their permissions in Accounts and Workspaces + +.. toctree:: + :maxdepth: 1 + + account_access_control_proxy + current_user + groups + permissions + service_principals + users \ No newline at end of file diff --git a/docs/workspace/iam/permissions.rst b/docs/workspace/iam/permissions.rst new file mode 100644 index 000000000..243b633c6 --- /dev/null +++ b/docs/workspace/iam/permissions.rst @@ -0,0 +1,181 @@ +``w.permissions``: Permissions +============================== +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: PermissionsAPI + + Permissions API are used to create read, write, edit, update and manage access for various users on + different objects and endpoints. + + * **[Cluster permissions](:service:clusters)** — Manage which users can manage, restart, or attach to + clusters. + + * **[Cluster policy permissions](:service:clusterpolicies)** — Manage which users can use cluster + policies. + + * **[Delta Live Tables pipeline permissions](:service:pipelines)** — Manage which users can view, + manage, run, cancel, or own a Delta Live Tables pipeline. + + * **[Job permissions](:service:jobs)** — Manage which users can view, manage, trigger, cancel, or own a + job. + + * **[MLflow experiment permissions](:service:experiments)** — Manage which users can read, edit, or + manage MLflow experiments. + + * **[MLflow registered model permissions](:service:modelregistry)** — Manage which users can read, edit, + or manage MLflow registered models. + + * **[Password permissions](:service:users)** — Manage which users can use password login when SSO is + enabled. + + * **[Instance Pool permissions](:service:instancepools)** — Manage which users can manage or attach to + pools. + + * **[Repo permissions](repos)** — Manage which users can read, run, edit, or manage a repo. + + * **[Serving endpoint permissions](:service:servingendpoints)** — Manage which users can view, query, or + manage a serving endpoint. + + * **[SQL warehouse permissions](:service:warehouses)** — Manage which users can use or manage SQL + warehouses. + + * **[Token permissions](:service:tokenmanagement)** — Manage which users can create or use tokens. + + * **[Workspace object permissions](:service:workspace)** — Manage which users can read, run, edit, or + manage directories, files, and notebooks. + + For the mapping of the required permissions for specific actions or abilities and other important + information, see [Access Control]. + + [Access Control]: https://docs.databricks.com/security/auth-authz/access-control/index.html + + .. py:method:: get(request_object_type: str, request_object_id: str) -> ObjectPermissions + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + obj = w.workspace.get_status(path=notebook_path) + + levels = w.permissions.get_permission_levels(request_object_type="notebooks", + request_object_id="%d" % (obj.object_id)) + + Get object permissions. + + Gets the permissions of an object. Objects can inherit permissions from their parent objects or root + object. + + :param request_object_type: str + The type of the request object. Can be one of the following: authorization, clusters, + cluster-policies, directories, experiments, files, instance-pools, jobs, notebooks, pipelines, + registered-models, repos, serving-endpoints, or sql-warehouses. + :param request_object_id: str + The id of the request object. + + :returns: :class:`ObjectPermissions` + + + .. py:method:: get_permission_levels(request_object_type: str, request_object_id: str) -> GetPermissionLevelsResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + obj = w.workspace.get_status(path=notebook_path) + + levels = w.permissions.get_permission_levels(request_object_type="notebooks", + request_object_id="%d" % (obj.object_id)) + + Get object permission levels. + + Gets the permission levels that a user can have on an object. + + :param request_object_type: str + + :param request_object_id: str + + + :returns: :class:`GetPermissionLevelsResponse` + + + .. py:method:: set(request_object_type: str, request_object_id: str [, access_control_list: Optional[List[AccessControlRequest]]]) -> ObjectPermissions + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + obj = w.workspace.get_status(path=notebook_path) + + _ = w.permissions.set(request_object_type="notebooks", + request_object_id="%d" % (obj.object_id), + access_control_list=[ + iam.AccessControlRequest(group_name=group.display_name, + permission_level=iam.PermissionLevel.CAN_RUN) + ]) + + # cleanup + w.groups.delete(id=group.id) + + Set object permissions. + + Sets permissions on an object. Objects can inherit permissions from their parent objects or root + object. + + :param request_object_type: str + The type of the request object. Can be one of the following: authorization, clusters, + cluster-policies, directories, experiments, files, instance-pools, jobs, notebooks, pipelines, + registered-models, repos, serving-endpoints, or sql-warehouses. + :param request_object_id: str + The id of the request object. + :param access_control_list: List[:class:`AccessControlRequest`] (optional) + + :returns: :class:`ObjectPermissions` + + + .. py:method:: update(request_object_type: str, request_object_id: str [, access_control_list: Optional[List[AccessControlRequest]]]) -> ObjectPermissions + + Update object permissions. + + Updates the permissions on an object. Objects can inherit permissions from their parent objects or + root object. + + :param request_object_type: str + The type of the request object. Can be one of the following: authorization, clusters, + cluster-policies, directories, experiments, files, instance-pools, jobs, notebooks, pipelines, + registered-models, repos, serving-endpoints, or sql-warehouses. + :param request_object_id: str + The id of the request object. + :param access_control_list: List[:class:`AccessControlRequest`] (optional) + + :returns: :class:`ObjectPermissions` + \ No newline at end of file diff --git a/docs/workspace/iam/service_principals.rst b/docs/workspace/iam/service_principals.rst new file mode 100644 index 000000000..41829fc87 --- /dev/null +++ b/docs/workspace/iam/service_principals.rst @@ -0,0 +1,241 @@ +``w.service_principals``: Service Principals +============================================ +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: ServicePrincipalsAPI + + Identities for use with jobs, automated tools, and systems such as scripts, apps, and CI/CD platforms. + Databricks recommends creating service principals to run production jobs or modify production data. If all + processes that act on production data run with service principals, interactive users do not need any + write, delete, or modify privileges in production. This eliminates the risk of a user overwriting + production data by accident. + + .. py:method:: create( [, active: Optional[bool], application_id: Optional[str], display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], id: Optional[str], roles: Optional[List[ComplexValue]], schemas: Optional[List[ServicePrincipalSchema]]]) -> ServicePrincipal + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + groups = w.groups.group_display_name_to_id_map(iam.ListGroupsRequest()) + + spn = w.service_principals.create(display_name=f'sdk-{time.time_ns()}', + groups=[iam.ComplexValue(value=groups["admins"])]) + + # cleanup + w.service_principals.delete(id=spn.id) + + Create a service principal. + + Creates a new service principal in the Databricks workspace. + + :param active: bool (optional) + If this user is active + :param application_id: str (optional) + UUID relating to the service principal + :param display_name: str (optional) + String that represents a concatenation of given and family names. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the service principal. See [assigning entitlements] for a full list of + supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param id: str (optional) + Databricks service principal ID. + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`ServicePrincipalSchema`] (optional) + The schema of the List response. + + :returns: :class:`ServicePrincipal` + + + .. py:method:: delete(id: str) + + Delete a service principal. + + Delete a single service principal in the Databricks workspace. + + :param id: str + Unique ID for a service principal in the Databricks workspace. + + + + + .. py:method:: get(id: str) -> ServicePrincipal + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.service_principals.create(display_name=f'sdk-{time.time_ns()}') + + by_id = w.service_principals.get(id=created.id) + + # cleanup + w.service_principals.delete(id=created.id) + + Get service principal details. + + Gets the details for a single service principal define in the Databricks workspace. + + :param id: str + Unique ID for a service principal in the Databricks workspace. + + :returns: :class:`ServicePrincipal` + + + .. py:method:: list( [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[ListSortOrder], start_index: Optional[int]]) -> Iterator[ServicePrincipal] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + sp_create = a.service_principals.create(active=True, display_name=f'sdk-{time.time_ns()}') + + sp = a.service_principals.get(id=sp_create.id) + + sp_list = a.service_principals.list(filter="displayName eq %v" % (sp.display_name)) + + # cleanup + a.service_principals.delete(id=sp_create.id) + + List service principals. + + Gets the set of service principals associated with a Databricks workspace. + + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. + :param sort_order: :class:`ListSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: Iterator over :class:`ServicePrincipal` + + + .. py:method:: patch(id: str [, operations: Optional[List[Patch]], schemas: Optional[List[PatchSchema]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import iam + + a = AccountClient() + + sp_create = a.service_principals.create(active=True, display_name=f'sdk-{time.time_ns()}') + + sp = a.service_principals.get(id=sp_create.id) + + a.service_principals.patch(id=sp.id, + operations=[iam.Patch(op=iam.PatchOp.REPLACE, path="active", value="false")], + schemas=[iam.PatchSchema.URN_IETF_PARAMS_SCIM_API_MESSAGES_2_0_PATCH_OP]) + + # cleanup + a.service_principals.delete(id=sp_create.id) + + Update service principal details. + + Partially updates the details of a single service principal in the Databricks workspace. + + :param id: str + Unique ID for a service principal in the Databricks workspace. + :param operations: List[:class:`Patch`] (optional) + :param schemas: List[:class:`PatchSchema`] (optional) + The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. + + + + + .. py:method:: update(id: str [, active: Optional[bool], application_id: Optional[str], display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], roles: Optional[List[ComplexValue]], schemas: Optional[List[ServicePrincipalSchema]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + created = w.service_principals.create(display_name=f'sdk-{time.time_ns()}') + + w.service_principals.update(id=created.id, + display_name=f'sdk-{time.time_ns()}', + roles=[iam.ComplexValue(value="xyz")]) + + # cleanup + w.service_principals.delete(id=created.id) + + Replace service principal. + + Updates the details of a single service principal. + + This action replaces the existing service principal with the same name. + + :param id: str + Databricks service principal ID. + :param active: bool (optional) + If this user is active + :param application_id: str (optional) + UUID relating to the service principal + :param display_name: str (optional) + String that represents a concatenation of given and family names. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the service principal. See [assigning entitlements] for a full list of + supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`ServicePrincipalSchema`] (optional) + The schema of the List response. + + + \ No newline at end of file diff --git a/docs/workspace/iam/users.rst b/docs/workspace/iam/users.rst new file mode 100644 index 000000000..2adaee935 --- /dev/null +++ b/docs/workspace/iam/users.rst @@ -0,0 +1,316 @@ +``w.users``: Users +================== +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: UsersAPI + + User identities recognized by Databricks and represented by email addresses. + + Databricks recommends using SCIM provisioning to sync users and groups automatically from your identity + provider to your Databricks workspace. SCIM streamlines onboarding a new employee or team by using your + identity provider to create users and groups in Databricks workspace and give them the proper level of + access. When a user leaves your organization or no longer needs access to Databricks workspace, admins can + terminate the user in your identity provider and that user’s account will also be removed from + Databricks workspace. This ensures a consistent offboarding process and prevents unauthorized users from + accessing sensitive data. + + .. py:method:: create( [, active: Optional[bool], display_name: Optional[str], emails: Optional[List[ComplexValue]], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], id: Optional[str], name: Optional[Name], roles: Optional[List[ComplexValue]], schemas: Optional[List[UserSchema]], user_name: Optional[str]]) -> User + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + user = a.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + # cleanup + a.users.delete(id=user.id) + + Create a new user. + + Creates a new user in the Databricks workspace. This new user will also be added to the Databricks + account. + + :param active: bool (optional) + If this user is active + :param display_name: str (optional) + String that represents a concatenation of given and family names. For example `John Smith`. This + field cannot be updated through the Workspace SCIM APIs when [identity federation is enabled]. Use + Account SCIM APIs to update `displayName`. + + [identity federation is enabled]: https://docs.databricks.com/administration-guide/users-groups/best-practices.html#enable-identity-federation + :param emails: List[:class:`ComplexValue`] (optional) + All the emails associated with the Databricks user. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the user. See [assigning entitlements] for a full list of supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + External ID is not currently supported. It is reserved for future use. + :param groups: List[:class:`ComplexValue`] (optional) + :param id: str (optional) + Databricks user ID. This is automatically set by Databricks. Any value provided by the client will + be ignored. + :param name: :class:`Name` (optional) + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`UserSchema`] (optional) + The schema of the user. + :param user_name: str (optional) + Email address of the Databricks user. + + :returns: :class:`User` + + + .. py:method:: delete(id: str) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + other_owner = w.users.create(user_name=f'sdk-{time.time_ns()}@example.com') + + w.users.delete(id=other_owner.id) + + Delete a user. + + Deletes a user. Deleting a user from a Databricks workspace also removes objects associated with the + user. + + :param id: str + Unique ID for a user in the Databricks workspace. + + + + + .. py:method:: get(id: str [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[GetSortOrder], start_index: Optional[int]]) -> User + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + user = a.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + by_id = a.users.get(id=user.id) + + # cleanup + a.users.delete(id=user.id) + + Get user details. + + Gets information for a specific user in Databricks workspace. + + :param id: str + Unique ID for a user in the Databricks workspace. + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. Multi-part paths are supported. For example, `userName`, + `name.givenName`, and `emails`. + :param sort_order: :class:`GetSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: :class:`User` + + + .. py:method:: get_permission_levels() -> GetPasswordPermissionLevelsResponse + + Get password permission levels. + + Gets the permission levels that a user can have on an object. + + :returns: :class:`GetPasswordPermissionLevelsResponse` + + + .. py:method:: get_permissions() -> PasswordPermissions + + Get password permissions. + + Gets the permissions of all passwords. Passwords can inherit permissions from their root object. + + :returns: :class:`PasswordPermissions` + + + .. py:method:: list( [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[ListSortOrder], start_index: Optional[int]]) -> Iterator[User] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + all_users = w.users.list(attributes="id,userName", + sort_by="userName", + sort_order=iam.ListSortOrder.DESCENDING) + + List users. + + Gets details for all the users associated with a Databricks workspace. + + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. Multi-part paths are supported. For example, `userName`, + `name.givenName`, and `emails`. + :param sort_order: :class:`ListSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: Iterator over :class:`User` + + + .. py:method:: patch(id: str [, operations: Optional[List[Patch]], schemas: Optional[List[PatchSchema]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + user = w.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + w.users.patch(id=user.id, + operations=[iam.Patch(op=iam.PatchOp.REPLACE, path="active", value="false")], + schemas=[iam.PatchSchema.URN_IETF_PARAMS_SCIM_API_MESSAGES_2_0_PATCH_OP]) + + Update user details. + + Partially updates a user resource by applying the supplied operations on specific user attributes. + + :param id: str + Unique ID for a user in the Databricks workspace. + :param operations: List[:class:`Patch`] (optional) + :param schemas: List[:class:`PatchSchema`] (optional) + The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. + + + + + .. py:method:: set_permissions( [, access_control_list: Optional[List[PasswordAccessControlRequest]]]) -> PasswordPermissions + + Set password permissions. + + Sets permissions on all passwords. Passwords can inherit permissions from their root object. + + :param access_control_list: List[:class:`PasswordAccessControlRequest`] (optional) + + :returns: :class:`PasswordPermissions` + + + .. py:method:: update(id: str [, active: Optional[bool], display_name: Optional[str], emails: Optional[List[ComplexValue]], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], name: Optional[Name], roles: Optional[List[ComplexValue]], schemas: Optional[List[UserSchema]], user_name: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + user = w.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + w.users.update(id=user.id, user_name=user.user_name, active=True) + + Replace a user. + + Replaces a user's information with the data supplied in request. + + :param id: str + Databricks user ID. This is automatically set by Databricks. Any value provided by the client will + be ignored. + :param active: bool (optional) + If this user is active + :param display_name: str (optional) + String that represents a concatenation of given and family names. For example `John Smith`. This + field cannot be updated through the Workspace SCIM APIs when [identity federation is enabled]. Use + Account SCIM APIs to update `displayName`. + + [identity federation is enabled]: https://docs.databricks.com/administration-guide/users-groups/best-practices.html#enable-identity-federation + :param emails: List[:class:`ComplexValue`] (optional) + All the emails associated with the Databricks user. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the user. See [assigning entitlements] for a full list of supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + External ID is not currently supported. It is reserved for future use. + :param groups: List[:class:`ComplexValue`] (optional) + :param name: :class:`Name` (optional) + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`UserSchema`] (optional) + The schema of the user. + :param user_name: str (optional) + Email address of the Databricks user. + + + + + .. py:method:: update_permissions( [, access_control_list: Optional[List[PasswordAccessControlRequest]]]) -> PasswordPermissions + + Update password permissions. + + Updates the permissions on all passwords. Passwords can inherit permissions from their root object. + + :param access_control_list: List[:class:`PasswordAccessControlRequest`] (optional) + + :returns: :class:`PasswordPermissions` + \ No newline at end of file diff --git a/docs/workspace/index.rst b/docs/workspace/index.rst new file mode 100644 index 000000000..de76f7c5f --- /dev/null +++ b/docs/workspace/index.rst @@ -0,0 +1,23 @@ + +Workspace APIs +============== + +These APIs are available from WorkspaceClient + +.. toctree:: + :maxdepth: 1 + + workspace/index + compute/index + jobs/index + pipelines/index + files/index + ml/index + serving/index + iam/index + sql/index + catalog/index + sharing/index + settings/index + vectorsearch/index + dashboards/index \ No newline at end of file diff --git a/docs/workspace/jobs/index.rst b/docs/workspace/jobs/index.rst new file mode 100644 index 000000000..a8f242ea2 --- /dev/null +++ b/docs/workspace/jobs/index.rst @@ -0,0 +1,10 @@ + +Jobs +==== + +Schedule automated jobs on Databricks Workspaces + +.. toctree:: + :maxdepth: 1 + + jobs \ No newline at end of file diff --git a/docs/workspace/jobs/jobs.rst b/docs/workspace/jobs/jobs.rst new file mode 100644 index 000000000..54a42c979 --- /dev/null +++ b/docs/workspace/jobs/jobs.rst @@ -0,0 +1,1096 @@ +``w.jobs``: Jobs +================ +.. currentmodule:: databricks.sdk.service.jobs + +.. py:class:: JobsAPI + + The Jobs API allows you to create, edit, and delete jobs. + + You can use a Databricks job to run a data processing or data analysis task in a Databricks cluster with + scalable resources. Your job can consist of a single task or can be a large, multi-task workflow with + complex dependencies. Databricks manages the task orchestration, cluster management, monitoring, and error + reporting for all of your jobs. You can run your jobs immediately or periodically through an easy-to-use + scheduling system. You can implement job tasks using notebooks, JARS, Delta Live Tables pipelines, or + Python, Scala, Spark submit, and Java applications. + + You should never hard code secrets or store them in plain text. Use the [Secrets CLI] to manage secrets in + the [Databricks CLI]. Use the [Secrets utility] to reference secrets in notebooks and jobs. + + [Databricks CLI]: https://docs.databricks.com/dev-tools/cli/index.html + [Secrets CLI]: https://docs.databricks.com/dev-tools/cli/secrets-cli.html + [Secrets utility]: https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-secrets + + .. py:method:: cancel_all_runs( [, all_queued_runs: Optional[bool], job_id: Optional[int]]) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + w.jobs.cancel_all_runs(job_id=created_job.job_id) + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Cancel all runs of a job. + + Cancels all active runs of a job. The runs are canceled asynchronously, so it doesn't prevent new runs + from being started. + + :param all_queued_runs: bool (optional) + Optional boolean parameter to cancel all queued runs. If no job_id is provided, all queued runs in + the workspace are canceled. + :param job_id: int (optional) + The canonical identifier of the job to cancel all runs of. + + + + + .. py:method:: cancel_run(run_id: int) -> Wait[Run] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + run_now_response = w.jobs.run_now(job_id=created_job.job_id) + + cancelled_run = w.jobs.cancel_run(run_id=run_now_response.response.run_id).result() + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Cancel a run. + + Cancels a job run or a task run. The run is canceled asynchronously, so it may still be running when + this request completes. + + :param run_id: int + This field is required. + + :returns: + Long-running operation waiter for :class:`Run`. + See :method:wait_get_run_job_terminated_or_skipped for more details. + + + .. py:method:: cancel_run_and_wait(run_id: int, timeout: datetime.timedelta = 0:20:00) -> Run + + + .. py:method:: create( [, access_control_list: Optional[List[iam.AccessControlRequest]], compute: Optional[List[JobCompute]], continuous: Optional[Continuous], deployment: Optional[JobDeployment], description: Optional[str], edit_mode: Optional[CreateJobEditMode], email_notifications: Optional[JobEmailNotifications], format: Optional[Format], git_source: Optional[GitSource], health: Optional[JobsHealthRules], job_clusters: Optional[List[JobCluster]], max_concurrent_runs: Optional[int], name: Optional[str], notification_settings: Optional[JobNotificationSettings], parameters: Optional[List[JobParameterDefinition]], queue: Optional[QueueSettings], run_as: Optional[JobRunAs], schedule: Optional[CronSchedule], tags: Optional[Dict[str, str]], tasks: Optional[List[Task]], timeout_seconds: Optional[int], trigger: Optional[TriggerSettings], webhook_notifications: Optional[WebhookNotifications]]) -> CreateResponse + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Create a new job. + + Create a new job. + + :param access_control_list: List[:class:`AccessControlRequest`] (optional) + List of permissions to set on the job. + :param compute: List[:class:`JobCompute`] (optional) + A list of compute requirements that can be referenced by tasks of this job. + :param continuous: :class:`Continuous` (optional) + An optional continuous property for this job. The continuous property will ensure that there is + always one run executing. Only one of `schedule` and `continuous` can be used. + :param deployment: :class:`JobDeployment` (optional) + Deployment information for jobs managed by external sources. + :param description: str (optional) + An optional description for the job. The maximum length is 1024 characters in UTF-8 encoding. + :param edit_mode: :class:`CreateJobEditMode` (optional) + Edit mode of the job. + + * `UI_LOCKED`: The job is in a locked UI state and cannot be modified. * `EDITABLE`: The job is in + an editable state and can be modified. + :param email_notifications: :class:`JobEmailNotifications` (optional) + An optional set of email addresses that is notified when runs of this job begin or complete as well + as when this job is deleted. + :param format: :class:`Format` (optional) + Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When + using the Jobs API 2.1 this value is always set to `"MULTI_TASK"`. + :param git_source: :class:`GitSource` (optional) + An optional specification for a remote Git repository containing the source code used by tasks. + Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. + + If `git_source` is set, these tasks retrieve the file from the remote repository by default. + However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. + + Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are + used, `git_source` must be defined on the job. + :param health: :class:`JobsHealthRules` (optional) + An optional set of health rules that can be defined for this job. + :param job_clusters: List[:class:`JobCluster`] (optional) + A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries + cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. + :param max_concurrent_runs: int (optional) + An optional maximum allowed number of concurrent runs of the job. + + Set this value if you want to be able to execute multiple runs of the same job concurrently. This is + useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs + to overlap with each other, or if you want to trigger multiple runs which differ by their input + parameters. + + This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are + 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. + However, from then on, new runs are skipped unless there are fewer than 3 active runs. + + This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. + :param name: str (optional) + An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. + :param notification_settings: :class:`JobNotificationSettings` (optional) + Optional notification settings that are used when sending notifications to each of the + `email_notifications` and `webhook_notifications` for this job. + :param parameters: List[:class:`JobParameterDefinition`] (optional) + Job-level parameter definitions + :param queue: :class:`QueueSettings` (optional) + The queue settings of the job. + :param run_as: :class:`JobRunAs` (optional) + Write-only setting, available only in Create/Update/Reset and Submit calls. Specifies the user or + service principal that the job runs as. If not specified, the job runs as the user who created the + job. + + Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is + thrown. + :param schedule: :class:`CronSchedule` (optional) + An optional periodic schedule for this job. The default behavior is that the job only runs when + triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + :param tags: Dict[str,str] (optional) + A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs + clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added + to the job. + :param tasks: List[:class:`Task`] (optional) + A list of task specifications to be executed by this job. + :param timeout_seconds: int (optional) + An optional timeout applied to each run of this job. A value of `0` means no timeout. + :param trigger: :class:`TriggerSettings` (optional) + Trigger settings for the job. Can be used to trigger a run when new files arrive in an external + location. The default behavior is that the job runs only when triggered by clicking “Run Now” in + the Jobs UI or sending an API request to `runNow`. + :param webhook_notifications: :class:`WebhookNotifications` (optional) + A collection of system notification IDs to notify when runs of this job begin or complete. + + :returns: :class:`CreateResponse` + + + .. py:method:: delete(job_id: int) + + Delete a job. + + Deletes a job. + + :param job_id: int + The canonical identifier of the job to delete. This field is required. + + + + + .. py:method:: delete_run(run_id: int) + + Delete a job run. + + Deletes a non-active run. Returns an error if the run is active. + + :param run_id: int + The canonical identifier of the run for which to retrieve the metadata. + + + + + .. py:method:: export_run(run_id: int [, views_to_export: Optional[ViewsToExport]]) -> ExportRunOutput + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + run_by_id = w.jobs.run_now(job_id=created_job.job_id).result() + + exported_view = w.jobs.export_run(run_id=run_by_id.tasks[0].run_id, views_to_export="CODE") + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Export and retrieve a job run. + + Export and retrieve the job run task. + + :param run_id: int + The canonical identifier for the run. This field is required. + :param views_to_export: :class:`ViewsToExport` (optional) + Which views to export (CODE, DASHBOARDS, or ALL). Defaults to CODE. + + :returns: :class:`ExportRunOutput` + + + .. py:method:: get(job_id: int) -> Job + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + run = w.jobs.submit(run_name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.SubmitTask(existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key=f'sdk-{time.time_ns()}') + ]).result() + + output = w.jobs.get_run_output(run_id=run.tasks[0].run_id) + + # cleanup + w.jobs.delete_run(run_id=run.run_id) + + Get a single job. + + Retrieves the details for a single job. + + :param job_id: int + The canonical identifier of the job to retrieve information about. This field is required. + + :returns: :class:`Job` + + + .. py:method:: get_permission_levels(job_id: str) -> GetJobPermissionLevelsResponse + + Get job permission levels. + + Gets the permission levels that a user can have on an object. + + :param job_id: str + The job for which to get or manage permissions. + + :returns: :class:`GetJobPermissionLevelsResponse` + + + .. py:method:: get_permissions(job_id: str) -> JobPermissions + + Get job permissions. + + Gets the permissions of a job. Jobs can inherit permissions from their root object. + + :param job_id: str + The job for which to get or manage permissions. + + :returns: :class:`JobPermissions` + + + .. py:method:: get_run(run_id: int [, include_history: Optional[bool], include_resolved_values: Optional[bool]]) -> Run + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + run = w.jobs.submit(run_name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.SubmitTask(existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key=f'sdk-{time.time_ns()}') + ]).result() + + output = w.jobs.get_run_output(run_id=run.tasks[0].run_id) + + # cleanup + w.jobs.delete_run(run_id=run.run_id) + + Get a single job run. + + Retrieve the metadata of a run. + + :param run_id: int + The canonical identifier of the run for which to retrieve the metadata. This field is required. + :param include_history: bool (optional) + Whether to include the repair history in the response. + :param include_resolved_values: bool (optional) + Whether to include resolved parameter values in the response. + + :returns: :class:`Run` + + + .. py:method:: get_run_output(run_id: int) -> RunOutput + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + run = w.jobs.submit(run_name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.SubmitTask(existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key=f'sdk-{time.time_ns()}') + ]).result() + + output = w.jobs.get_run_output(run_id=run.tasks[0].run_id) + + # cleanup + w.jobs.delete_run(run_id=run.run_id) + + Get the output for a single run. + + Retrieve the output and metadata of a single task run. When a notebook task returns a value through + the `dbutils.notebook.exit()` call, you can use this endpoint to retrieve that value. Databricks + restricts this API to returning the first 5 MB of the output. To return a larger result, you can store + job results in a cloud storage service. + + This endpoint validates that the __run_id__ parameter is valid and returns an HTTP status code 400 if + the __run_id__ parameter is invalid. Runs are automatically removed after 60 days. If you to want to + reference them beyond 60 days, you must save old run results before they expire. + + :param run_id: int + The canonical identifier for the run. This field is required. + + :returns: :class:`RunOutput` + + + .. py:method:: list( [, expand_tasks: Optional[bool], limit: Optional[int], name: Optional[str], offset: Optional[int], page_token: Optional[str]]) -> Iterator[BaseJob] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + run_list = w.jobs.list_runs(job_id=created_job.job_id) + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + List jobs. + + Retrieves a list of jobs. + + :param expand_tasks: bool (optional) + Whether to include task and cluster details in the response. + :param limit: int (optional) + The number of jobs to return. This value must be greater than 0 and less or equal to 100. The + default value is 20. + :param name: str (optional) + A filter on the list based on the exact (case insensitive) job name. + :param offset: int (optional) + The offset of the first job to return, relative to the most recently created job. + + Deprecated since June 2023. Use `page_token` to iterate through the pages instead. + :param page_token: str (optional) + Use `next_page_token` or `prev_page_token` returned from the previous request to list the next or + previous page of jobs respectively. + + :returns: Iterator over :class:`BaseJob` + + + .. py:method:: list_runs( [, active_only: Optional[bool], completed_only: Optional[bool], expand_tasks: Optional[bool], job_id: Optional[int], limit: Optional[int], offset: Optional[int], page_token: Optional[str], run_type: Optional[ListRunsRunType], start_time_from: Optional[int], start_time_to: Optional[int]]) -> Iterator[BaseRun] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + run_list = w.jobs.list_runs(job_id=created_job.job_id) + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + List job runs. + + List runs in descending order by start time. + + :param active_only: bool (optional) + If active_only is `true`, only active runs are included in the results; otherwise, lists both active + and completed runs. An active run is a run in the `QUEUED`, `PENDING`, `RUNNING`, or `TERMINATING`. + This field cannot be `true` when completed_only is `true`. + :param completed_only: bool (optional) + If completed_only is `true`, only completed runs are included in the results; otherwise, lists both + active and completed runs. This field cannot be `true` when active_only is `true`. + :param expand_tasks: bool (optional) + Whether to include task and cluster details in the response. + :param job_id: int (optional) + The job for which to list runs. If omitted, the Jobs service lists runs from all jobs. + :param limit: int (optional) + The number of runs to return. This value must be greater than 0 and less than 25. The default value + is 20. If a request specifies a limit of 0, the service instead uses the maximum limit. + :param offset: int (optional) + The offset of the first run to return, relative to the most recent run. + + Deprecated since June 2023. Use `page_token` to iterate through the pages instead. + :param page_token: str (optional) + Use `next_page_token` or `prev_page_token` returned from the previous request to list the next or + previous page of runs respectively. + :param run_type: :class:`ListRunsRunType` (optional) + The type of runs to return. For a description of run types, see :method:jobs/getRun. + :param start_time_from: int (optional) + Show runs that started _at or after_ this value. The value must be a UTC timestamp in milliseconds. + Can be combined with _start_time_to_ to filter by a time range. + :param start_time_to: int (optional) + Show runs that started _at or before_ this value. The value must be a UTC timestamp in milliseconds. + Can be combined with _start_time_from_ to filter by a time range. + + :returns: Iterator over :class:`BaseRun` + + + .. py:method:: repair_run(run_id: int [, dbt_commands: Optional[List[str]], jar_params: Optional[List[str]], job_parameters: Optional[Dict[str, str]], latest_repair_id: Optional[int], notebook_params: Optional[Dict[str, str]], pipeline_params: Optional[PipelineParams], python_named_params: Optional[Dict[str, str]], python_params: Optional[List[str]], rerun_all_failed_tasks: Optional[bool], rerun_dependent_tasks: Optional[bool], rerun_tasks: Optional[List[str]], spark_submit_params: Optional[List[str]], sql_params: Optional[Dict[str, str]]]) -> Wait[Run] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + run_now_response = w.jobs.run_now(job_id=created_job.job_id) + + cancelled_run = w.jobs.cancel_run(run_id=run_now_response.response.run_id).result() + + repaired_run = w.jobs.repair_run(rerun_tasks=[cancelled_run.tasks[0].task_key], + run_id=run_now_response.response.run_id).result() + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Repair a job run. + + Re-run one or more tasks. Tasks are re-run as part of the original job run. They use the current job + and task settings, and can be viewed in the history for the original job run. + + :param run_id: int + The job run ID of the run to repair. The run must not be in progress. + :param dbt_commands: List[str] (optional) + An array of commands to execute for jobs with the dbt task, for example `"dbt_commands": ["dbt + deps", "dbt seed", "dbt run"]` + :param jar_params: List[str] (optional) + A list of parameters for jobs with Spark JAR tasks, for example `"jar_params": ["john doe", "35"]`. + The parameters are used to invoke the main function of the main class specified in the Spark JAR + task. If not specified upon `run-now`, it defaults to an empty list. jar_params cannot be specified + in conjunction with notebook_params. The JSON representation of this field (for example + `{"jar_params":["john doe","35"]}`) cannot exceed 10,000 bytes. + + Use [Task parameter variables](/jobs.html"#parameter-variables") to set parameters containing + information about job runs. + :param job_parameters: Dict[str,str] (optional) + Job-level parameters used in the run. for example `"param": "overriding_val"` + :param latest_repair_id: int (optional) + The ID of the latest repair. This parameter is not required when repairing a run for the first time, + but must be provided on subsequent requests to repair the same run. + :param notebook_params: Dict[str,str] (optional) + A map from keys to values for jobs with notebook task, for example `"notebook_params": {"name": + "john doe", "age": "35"}`. The map is passed to the notebook and is accessible through the + [dbutils.widgets.get] function. + + If not specified upon `run-now`, the triggered run uses the job’s base parameters. + + notebook_params cannot be specified in conjunction with jar_params. + + Use [Task parameter variables] to set parameters containing information about job runs. + + The JSON representation of this field (for example `{"notebook_params":{"name":"john + doe","age":"35"}}`) cannot exceed 10,000 bytes. + + [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables + [dbutils.widgets.get]: https://docs.databricks.com/dev-tools/databricks-utils.html + :param pipeline_params: :class:`PipelineParams` (optional) + :param python_named_params: Dict[str,str] (optional) + A map from keys to values for jobs with Python wheel task, for example `"python_named_params": + {"name": "task", "data": "dbfs:/path/to/data.json"}`. + :param python_params: List[str] (optional) + A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. + The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it + would overwrite the parameters specified in job setting. The JSON representation of this field (for + example `{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes. + + Use [Task parameter variables] to set parameters containing information about job runs. + + Important + + These parameters accept only Latin characters (ASCII character set). Using non-ASCII characters + returns an error. Examples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and + emojis. + + [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables + :param rerun_all_failed_tasks: bool (optional) + If true, repair all failed tasks. Only one of `rerun_tasks` or `rerun_all_failed_tasks` can be used. + :param rerun_dependent_tasks: bool (optional) + If true, repair all tasks that depend on the tasks in `rerun_tasks`, even if they were previously + successful. Can be also used in combination with `rerun_all_failed_tasks`. + :param rerun_tasks: List[str] (optional) + The task keys of the task runs to repair. + :param spark_submit_params: List[str] (optional) + A list of parameters for jobs with spark submit task, for example `"spark_submit_params": + ["--class", "org.apache.spark.examples.SparkPi"]`. The parameters are passed to spark-submit script + as command-line parameters. If specified upon `run-now`, it would overwrite the parameters specified + in job setting. The JSON representation of this field (for example `{"python_params":["john + doe","35"]}`) cannot exceed 10,000 bytes. + + Use [Task parameter variables] to set parameters containing information about job runs + + Important + + These parameters accept only Latin characters (ASCII character set). Using non-ASCII characters + returns an error. Examples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and + emojis. + + [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables + :param sql_params: Dict[str,str] (optional) + A map from keys to values for jobs with SQL task, for example `"sql_params": {"name": "john doe", + "age": "35"}`. The SQL alert task does not support custom parameters. + + :returns: + Long-running operation waiter for :class:`Run`. + See :method:wait_get_run_job_terminated_or_skipped for more details. + + + .. py:method:: repair_run_and_wait(run_id: int [, dbt_commands: Optional[List[str]], jar_params: Optional[List[str]], job_parameters: Optional[Dict[str, str]], latest_repair_id: Optional[int], notebook_params: Optional[Dict[str, str]], pipeline_params: Optional[PipelineParams], python_named_params: Optional[Dict[str, str]], python_params: Optional[List[str]], rerun_all_failed_tasks: Optional[bool], rerun_dependent_tasks: Optional[bool], rerun_tasks: Optional[List[str]], spark_submit_params: Optional[List[str]], sql_params: Optional[Dict[str, str]], timeout: datetime.timedelta = 0:20:00]) -> Run + + + .. py:method:: reset(job_id: int, new_settings: JobSettings) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + new_name = f'sdk-{time.time_ns()}' + + by_id = w.jobs.get(job_id=created_job.job_id) + + w.jobs.reset(job_id=by_id.job_id, new_settings=jobs.JobSettings(name=new_name, tasks=by_id.settings.tasks)) + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Update all job settings (reset). + + Overwrite all settings for the given job. Use the [_Update_ endpoint](:method:jobs/update) to update + job settings partially. + + :param job_id: int + The canonical identifier of the job to reset. This field is required. + :param new_settings: :class:`JobSettings` + The new settings of the job. These settings completely replace the old settings. + + Changes to the field `JobBaseSettings.timeout_seconds` are applied to active runs. Changes to other + fields are applied to future runs only. + + + + + .. py:method:: run_now(job_id: int [, dbt_commands: Optional[List[str]], idempotency_token: Optional[str], jar_params: Optional[List[str]], job_parameters: Optional[Dict[str, str]], notebook_params: Optional[Dict[str, str]], pipeline_params: Optional[PipelineParams], python_named_params: Optional[Dict[str, str]], python_params: Optional[List[str]], queue: Optional[QueueSettings], spark_submit_params: Optional[List[str]], sql_params: Optional[Dict[str, str]]]) -> Wait[Run] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + run_by_id = w.jobs.run_now(job_id=created_job.job_id).result() + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Trigger a new job run. + + Run a job and return the `run_id` of the triggered run. + + :param job_id: int + The ID of the job to be executed + :param dbt_commands: List[str] (optional) + An array of commands to execute for jobs with the dbt task, for example `"dbt_commands": ["dbt + deps", "dbt seed", "dbt run"]` + :param idempotency_token: str (optional) + An optional token to guarantee the idempotency of job run requests. If a run with the provided token + already exists, the request does not create a new run but returns the ID of the existing run + instead. If a run with the provided token is deleted, an error is returned. + + If you specify the idempotency token, upon failure you can retry until the request succeeds. + Databricks guarantees that exactly one run is launched with that idempotency token. + + This token must have at most 64 characters. + + For more information, see [How to ensure idempotency for jobs]. + + [How to ensure idempotency for jobs]: https://kb.databricks.com/jobs/jobs-idempotency.html + :param jar_params: List[str] (optional) + A list of parameters for jobs with Spark JAR tasks, for example `"jar_params": ["john doe", "35"]`. + The parameters are used to invoke the main function of the main class specified in the Spark JAR + task. If not specified upon `run-now`, it defaults to an empty list. jar_params cannot be specified + in conjunction with notebook_params. The JSON representation of this field (for example + `{"jar_params":["john doe","35"]}`) cannot exceed 10,000 bytes. + + Use [Task parameter variables](/jobs.html"#parameter-variables") to set parameters containing + information about job runs. + :param job_parameters: Dict[str,str] (optional) + Job-level parameters used in the run. for example `"param": "overriding_val"` + :param notebook_params: Dict[str,str] (optional) + A map from keys to values for jobs with notebook task, for example `"notebook_params": {"name": + "john doe", "age": "35"}`. The map is passed to the notebook and is accessible through the + [dbutils.widgets.get] function. + + If not specified upon `run-now`, the triggered run uses the job’s base parameters. + + notebook_params cannot be specified in conjunction with jar_params. + + Use [Task parameter variables] to set parameters containing information about job runs. + + The JSON representation of this field (for example `{"notebook_params":{"name":"john + doe","age":"35"}}`) cannot exceed 10,000 bytes. + + [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables + [dbutils.widgets.get]: https://docs.databricks.com/dev-tools/databricks-utils.html + :param pipeline_params: :class:`PipelineParams` (optional) + :param python_named_params: Dict[str,str] (optional) + A map from keys to values for jobs with Python wheel task, for example `"python_named_params": + {"name": "task", "data": "dbfs:/path/to/data.json"}`. + :param python_params: List[str] (optional) + A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. + The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it + would overwrite the parameters specified in job setting. The JSON representation of this field (for + example `{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes. + + Use [Task parameter variables] to set parameters containing information about job runs. + + Important + + These parameters accept only Latin characters (ASCII character set). Using non-ASCII characters + returns an error. Examples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and + emojis. + + [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables + :param queue: :class:`QueueSettings` (optional) + The queue settings of the run. + :param spark_submit_params: List[str] (optional) + A list of parameters for jobs with spark submit task, for example `"spark_submit_params": + ["--class", "org.apache.spark.examples.SparkPi"]`. The parameters are passed to spark-submit script + as command-line parameters. If specified upon `run-now`, it would overwrite the parameters specified + in job setting. The JSON representation of this field (for example `{"python_params":["john + doe","35"]}`) cannot exceed 10,000 bytes. + + Use [Task parameter variables] to set parameters containing information about job runs + + Important + + These parameters accept only Latin characters (ASCII character set). Using non-ASCII characters + returns an error. Examples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and + emojis. + + [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables + :param sql_params: Dict[str,str] (optional) + A map from keys to values for jobs with SQL task, for example `"sql_params": {"name": "john doe", + "age": "35"}`. The SQL alert task does not support custom parameters. + + :returns: + Long-running operation waiter for :class:`Run`. + See :method:wait_get_run_job_terminated_or_skipped for more details. + + + .. py:method:: run_now_and_wait(job_id: int [, dbt_commands: Optional[List[str]], idempotency_token: Optional[str], jar_params: Optional[List[str]], job_parameters: Optional[Dict[str, str]], notebook_params: Optional[Dict[str, str]], pipeline_params: Optional[PipelineParams], python_named_params: Optional[Dict[str, str]], python_params: Optional[List[str]], queue: Optional[QueueSettings], spark_submit_params: Optional[List[str]], sql_params: Optional[Dict[str, str]], timeout: datetime.timedelta = 0:20:00]) -> Run + + + .. py:method:: set_permissions(job_id: str [, access_control_list: Optional[List[JobAccessControlRequest]]]) -> JobPermissions + + Set job permissions. + + Sets permissions on a job. Jobs can inherit permissions from their root object. + + :param job_id: str + The job for which to get or manage permissions. + :param access_control_list: List[:class:`JobAccessControlRequest`] (optional) + + :returns: :class:`JobPermissions` + + + .. py:method:: submit( [, access_control_list: Optional[List[iam.AccessControlRequest]], email_notifications: Optional[JobEmailNotifications], git_source: Optional[GitSource], health: Optional[JobsHealthRules], idempotency_token: Optional[str], notification_settings: Optional[JobNotificationSettings], queue: Optional[QueueSettings], run_name: Optional[str], tasks: Optional[List[SubmitTask]], timeout_seconds: Optional[int], webhook_notifications: Optional[WebhookNotifications]]) -> Wait[Run] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + run = w.jobs.submit(run_name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.SubmitTask(existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key=f'sdk-{time.time_ns()}') + ]).result() + + # cleanup + w.jobs.delete_run(run_id=run.run_id) + + Create and trigger a one-time run. + + Submit a one-time run. This endpoint allows you to submit a workload directly without creating a job. + Runs submitted using this endpoint don’t display in the UI. Use the `jobs/runs/get` API to check the + run state after the job is submitted. + + :param access_control_list: List[:class:`AccessControlRequest`] (optional) + List of permissions to set on the job. + :param email_notifications: :class:`JobEmailNotifications` (optional) + An optional set of email addresses notified when the run begins or completes. + :param git_source: :class:`GitSource` (optional) + An optional specification for a remote Git repository containing the source code used by tasks. + Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. + + If `git_source` is set, these tasks retrieve the file from the remote repository by default. + However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. + + Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are + used, `git_source` must be defined on the job. + :param health: :class:`JobsHealthRules` (optional) + An optional set of health rules that can be defined for this job. + :param idempotency_token: str (optional) + An optional token that can be used to guarantee the idempotency of job run requests. If a run with + the provided token already exists, the request does not create a new run but returns the ID of the + existing run instead. If a run with the provided token is deleted, an error is returned. + + If you specify the idempotency token, upon failure you can retry until the request succeeds. + Databricks guarantees that exactly one run is launched with that idempotency token. + + This token must have at most 64 characters. + + For more information, see [How to ensure idempotency for jobs]. + + [How to ensure idempotency for jobs]: https://kb.databricks.com/jobs/jobs-idempotency.html + :param notification_settings: :class:`JobNotificationSettings` (optional) + Optional notification settings that are used when sending notifications to each of the + `email_notifications` and `webhook_notifications` for this run. + :param queue: :class:`QueueSettings` (optional) + The queue settings of the one-time run. + :param run_name: str (optional) + An optional name for the run. The default value is `Untitled`. + :param tasks: List[:class:`SubmitTask`] (optional) + :param timeout_seconds: int (optional) + An optional timeout applied to each run of this job. A value of `0` means no timeout. + :param webhook_notifications: :class:`WebhookNotifications` (optional) + A collection of system notification IDs to notify when the run begins or completes. + + :returns: + Long-running operation waiter for :class:`Run`. + See :method:wait_get_run_job_terminated_or_skipped for more details. + + + .. py:method:: submit_and_wait( [, access_control_list: Optional[List[iam.AccessControlRequest]], email_notifications: Optional[JobEmailNotifications], git_source: Optional[GitSource], health: Optional[JobsHealthRules], idempotency_token: Optional[str], notification_settings: Optional[JobNotificationSettings], queue: Optional[QueueSettings], run_name: Optional[str], tasks: Optional[List[SubmitTask]], timeout_seconds: Optional[int], webhook_notifications: Optional[WebhookNotifications], timeout: datetime.timedelta = 0:20:00]) -> Run + + + .. py:method:: update(job_id: int [, fields_to_remove: Optional[List[str]], new_settings: Optional[JobSettings]]) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + new_name = f'sdk-{time.time_ns()}' + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + w.jobs.update(job_id=created_job.job_id, new_settings=jobs.JobSettings(name=new_name, max_concurrent_runs=5)) + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Update job settings partially. + + Add, update, or remove specific settings of an existing job. Use the [_Reset_ + endpoint](:method:jobs/reset) to overwrite all job settings. + + :param job_id: int + The canonical identifier of the job to update. This field is required. + :param fields_to_remove: List[str] (optional) + Remove top-level fields in the job settings. Removing nested fields is not supported, except for + tasks and job clusters (`tasks/task_1`). This field is optional. + :param new_settings: :class:`JobSettings` (optional) + The new settings for the job. + + Top-level fields specified in `new_settings` are completely replaced, except for arrays which are + merged. That is, new and existing entries are completely replaced based on the respective key + fields, i.e. `task_key` or `job_cluster_key`, while previous entries are kept. + + Partially updating nested fields is not supported. + + Changes to the field `JobSettings.timeout_seconds` are applied to active runs. Changes to other + fields are applied to future runs only. + + + + + .. py:method:: update_permissions(job_id: str [, access_control_list: Optional[List[JobAccessControlRequest]]]) -> JobPermissions + + Update job permissions. + + Updates the permissions on a job. Jobs can inherit permissions from their root object. + + :param job_id: str + The job for which to get or manage permissions. + :param access_control_list: List[:class:`JobAccessControlRequest`] (optional) + + :returns: :class:`JobPermissions` + + + .. py:method:: wait_get_run_job_terminated_or_skipped(run_id: int, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[Run], None]]) -> Run diff --git a/docs/workspace/ml/experiments.rst b/docs/workspace/ml/experiments.rst new file mode 100644 index 000000000..62e16fc24 --- /dev/null +++ b/docs/workspace/ml/experiments.rst @@ -0,0 +1,688 @@ +``w.experiments``: Experiments +============================== +.. currentmodule:: databricks.sdk.service.ml + +.. py:class:: ExperimentsAPI + + Experiments are the primary unit of organization in MLflow; all MLflow runs belong to an experiment. Each + experiment lets you visualize, search, and compare runs, as well as download run artifacts or metadata for + analysis in other tools. Experiments are maintained in a Databricks hosted MLflow tracking server. + + Experiments are located in the workspace file tree. You manage experiments using the same tools you use to + manage other workspace objects such as folders, notebooks, and libraries. + + .. py:method:: create_experiment(name: str [, artifact_location: Optional[str], tags: Optional[List[ExperimentTag]]]) -> CreateExperimentResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + experiment = w.experiments.create_experiment(name=f'sdk-{time.time_ns()}') + + # cleanup + w.experiments.delete_experiment(experiment_id=experiment.experiment_id) + + Create experiment. + + Creates an experiment with a name. Returns the ID of the newly created experiment. Validates that + another experiment with the same name does not already exist and fails if another experiment with the + same name already exists. + + Throws `RESOURCE_ALREADY_EXISTS` if a experiment with the given name exists. + + :param name: str + Experiment name. + :param artifact_location: str (optional) + Location where all artifacts for the experiment are stored. If not provided, the remote server will + select an appropriate default. + :param tags: List[:class:`ExperimentTag`] (optional) + A collection of tags to set on the experiment. Maximum tag size and number of tags per request + depends on the storage backend. All storage backends are guaranteed to support tag keys up to 250 + bytes in size and tag values up to 5000 bytes in size. All storage backends are also guaranteed to + support up to 20 tags per request. + + :returns: :class:`CreateExperimentResponse` + + + .. py:method:: create_run( [, experiment_id: Optional[str], start_time: Optional[int], tags: Optional[List[RunTag]], user_id: Optional[str]]) -> CreateRunResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import ml + + w = WorkspaceClient() + + experiment = w.experiments.create_experiment(name=f'sdk-{time.time_ns()}') + + created = w.experiments.create_run(experiment_id=experiment.experiment_id, + tags=[ml.RunTag(key="foo", value="bar")]) + + # cleanup + w.experiments.delete_experiment(experiment_id=experiment.experiment_id) + w.experiments.delete_run(run_id=created.run.info.run_id) + + Create a run. + + Creates a new run within an experiment. A run is usually a single execution of a machine learning or + data ETL pipeline. MLflow uses runs to track the `mlflowParam`, `mlflowMetric` and `mlflowRunTag` + associated with a single execution. + + :param experiment_id: str (optional) + ID of the associated experiment. + :param start_time: int (optional) + Unix timestamp in milliseconds of when the run started. + :param tags: List[:class:`RunTag`] (optional) + Additional metadata for run. + :param user_id: str (optional) + ID of the user executing the run. This field is deprecated as of MLflow 1.0, and will be removed in + a future MLflow release. Use 'mlflow.user' tag instead. + + :returns: :class:`CreateRunResponse` + + + .. py:method:: delete_experiment(experiment_id: str) + + Delete an experiment. + + Marks an experiment and associated metadata, runs, metrics, params, and tags for deletion. If the + experiment uses FileStore, artifacts associated with experiment are also deleted. + + :param experiment_id: str + ID of the associated experiment. + + + + + .. py:method:: delete_run(run_id: str) + + Delete a run. + + Marks a run for deletion. + + :param run_id: str + ID of the run to delete. + + + + + .. py:method:: delete_runs(experiment_id: str, max_timestamp_millis: int [, max_runs: Optional[int]]) -> DeleteRunsResponse + + Delete runs by creation time. + + Bulk delete runs in an experiment that were created prior to or at the specified timestamp. Deletes at + most max_runs per request. + + :param experiment_id: str + The ID of the experiment containing the runs to delete. + :param max_timestamp_millis: int + The maximum creation timestamp in milliseconds since the UNIX epoch for deleting runs. Only runs + created prior to or at this timestamp are deleted. + :param max_runs: int (optional) + An optional positive integer indicating the maximum number of runs to delete. The maximum allowed + value for max_runs is 10000. + + :returns: :class:`DeleteRunsResponse` + + + .. py:method:: delete_tag(run_id: str, key: str) + + Delete a tag. + + Deletes a tag on a run. Tags are run metadata that can be updated during a run and after a run + completes. + + :param run_id: str + ID of the run that the tag was logged under. Must be provided. + :param key: str + Name of the tag. Maximum size is 255 bytes. Must be provided. + + + + + .. py:method:: get_by_name(experiment_name: str) -> GetExperimentResponse + + Get metadata. + + Gets metadata for an experiment. + + This endpoint will return deleted experiments, but prefers the active experiment if an active and + deleted experiment share the same name. If multiple deleted experiments share the same name, the API + will return one of them. + + Throws `RESOURCE_DOES_NOT_EXIST` if no experiment with the specified name exists. + + :param experiment_name: str + Name of the associated experiment. + + :returns: :class:`GetExperimentResponse` + + + .. py:method:: get_experiment(experiment_id: str) -> GetExperimentResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + experiment = w.experiments.create_experiment(name=f'sdk-{time.time_ns()}') + + _ = w.experiments.get_experiment(experiment_id=experiment.experiment_id) + + # cleanup + w.experiments.delete_experiment(experiment_id=experiment.experiment_id) + + Get an experiment. + + Gets metadata for an experiment. This method works on deleted experiments. + + :param experiment_id: str + ID of the associated experiment. + + :returns: :class:`GetExperimentResponse` + + + .. py:method:: get_history(metric_key: str [, max_results: Optional[int], page_token: Optional[str], run_id: Optional[str], run_uuid: Optional[str]]) -> Iterator[Metric] + + Get history of a given metric within a run. + + Gets a list of all values for the specified metric for a given run. + + :param metric_key: str + Name of the metric. + :param max_results: int (optional) + Maximum number of Metric records to return per paginated request. Default is set to 25,000. If set + higher than 25,000, a request Exception will be raised. + :param page_token: str (optional) + Token indicating the page of metric histories to fetch. + :param run_id: str (optional) + ID of the run from which to fetch metric values. Must be provided. + :param run_uuid: str (optional) + [Deprecated, use run_id instead] ID of the run from which to fetch metric values. This field will be + removed in a future MLflow version. + + :returns: Iterator over :class:`Metric` + + + .. py:method:: get_permission_levels(experiment_id: str) -> GetExperimentPermissionLevelsResponse + + Get experiment permission levels. + + Gets the permission levels that a user can have on an object. + + :param experiment_id: str + The experiment for which to get or manage permissions. + + :returns: :class:`GetExperimentPermissionLevelsResponse` + + + .. py:method:: get_permissions(experiment_id: str) -> ExperimentPermissions + + Get experiment permissions. + + Gets the permissions of an experiment. Experiments can inherit permissions from their root object. + + :param experiment_id: str + The experiment for which to get or manage permissions. + + :returns: :class:`ExperimentPermissions` + + + .. py:method:: get_run(run_id: str [, run_uuid: Optional[str]]) -> GetRunResponse + + Get a run. + + Gets the metadata, metrics, params, and tags for a run. In the case where multiple metrics with the + same key are logged for a run, return only the value with the latest timestamp. + + If there are multiple values with the latest timestamp, return the maximum of these values. + + :param run_id: str + ID of the run to fetch. Must be provided. + :param run_uuid: str (optional) + [Deprecated, use run_id instead] ID of the run to fetch. This field will be removed in a future + MLflow version. + + :returns: :class:`GetRunResponse` + + + .. py:method:: list_artifacts( [, page_token: Optional[str], path: Optional[str], run_id: Optional[str], run_uuid: Optional[str]]) -> Iterator[FileInfo] + + Get all artifacts. + + List artifacts for a run. Takes an optional `artifact_path` prefix. If it is specified, the response + contains only artifacts with the specified prefix.", + + :param page_token: str (optional) + Token indicating the page of artifact results to fetch + :param path: str (optional) + Filter artifacts matching this path (a relative path from the root artifact directory). + :param run_id: str (optional) + ID of the run whose artifacts to list. Must be provided. + :param run_uuid: str (optional) + [Deprecated, use run_id instead] ID of the run whose artifacts to list. This field will be removed + in a future MLflow version. + + :returns: Iterator over :class:`FileInfo` + + + .. py:method:: list_experiments( [, max_results: Optional[int], page_token: Optional[str], view_type: Optional[str]]) -> Iterator[Experiment] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import ml + + w = WorkspaceClient() + + all = w.experiments.list_experiments(ml.ListExperimentsRequest()) + + List experiments. + + Gets a list of all experiments. + + :param max_results: int (optional) + Maximum number of experiments desired. If `max_results` is unspecified, return all experiments. If + `max_results` is too large, it'll be automatically capped at 1000. Callers of this endpoint are + encouraged to pass max_results explicitly and leverage page_token to iterate through experiments. + :param page_token: str (optional) + Token indicating the page of experiments to fetch + :param view_type: str (optional) + Qualifier for type of experiments to be returned. If unspecified, return only active experiments. + + :returns: Iterator over :class:`Experiment` + + + .. py:method:: log_batch( [, metrics: Optional[List[Metric]], params: Optional[List[Param]], run_id: Optional[str], tags: Optional[List[RunTag]]]) + + Log a batch. + + Logs a batch of metrics, params, and tags for a run. If any data failed to be persisted, the server + will respond with an error (non-200 status code). + + In case of error (due to internal server error or an invalid request), partial data may be written. + + You can write metrics, params, and tags in interleaving fashion, but within a given entity type are + guaranteed to follow the order specified in the request body. + + The overwrite behavior for metrics, params, and tags is as follows: + + * Metrics: metric values are never overwritten. Logging a metric (key, value, timestamp) appends to + the set of values for the metric with the provided key. + + * Tags: tag values can be overwritten by successive writes to the same tag key. That is, if multiple + tag values with the same key are provided in the same API request, the last-provided tag value is + written. Logging the same tag (key, value) is permitted. Specifically, logging a tag is idempotent. + + * Parameters: once written, param values cannot be changed (attempting to overwrite a param value will + result in an error). However, logging the same param (key, value) is permitted. Specifically, logging + a param is idempotent. + + Request Limits ------------------------------- A single JSON-serialized API request may be up to 1 MB + in size and contain: + + * No more than 1000 metrics, params, and tags in total * Up to 1000 metrics * Up to 100 params * Up to + 100 tags + + For example, a valid request might contain 900 metrics, 50 params, and 50 tags, but logging 900 + metrics, 50 params, and 51 tags is invalid. + + The following limits also apply to metric, param, and tag keys and values: + + * Metric keys, param keys, and tag keys can be up to 250 characters in length * Parameter and tag + values can be up to 250 characters in length + + :param metrics: List[:class:`Metric`] (optional) + Metrics to log. A single request can contain up to 1000 metrics, and up to 1000 metrics, params, and + tags in total. + :param params: List[:class:`Param`] (optional) + Params to log. A single request can contain up to 100 params, and up to 1000 metrics, params, and + tags in total. + :param run_id: str (optional) + ID of the run to log under + :param tags: List[:class:`RunTag`] (optional) + Tags to log. A single request can contain up to 100 tags, and up to 1000 metrics, params, and tags + in total. + + + + + .. py:method:: log_inputs( [, datasets: Optional[List[DatasetInput]], run_id: Optional[str]]) + + Log inputs to a run. + + **NOTE:** Experimental: This API may change or be removed in a future release without warning. + + :param datasets: List[:class:`DatasetInput`] (optional) + Dataset inputs + :param run_id: str (optional) + ID of the run to log under + + + + + .. py:method:: log_metric(key: str, value: float, timestamp: int [, run_id: Optional[str], run_uuid: Optional[str], step: Optional[int]]) + + Log a metric. + + Logs a metric for a run. A metric is a key-value pair (string key, float value) with an associated + timestamp. Examples include the various metrics that represent ML model accuracy. A metric can be + logged multiple times. + + :param key: str + Name of the metric. + :param value: float + Double value of the metric being logged. + :param timestamp: int + Unix timestamp in milliseconds at the time metric was logged. + :param run_id: str (optional) + ID of the run under which to log the metric. Must be provided. + :param run_uuid: str (optional) + [Deprecated, use run_id instead] ID of the run under which to log the metric. This field will be + removed in a future MLflow version. + :param step: int (optional) + Step at which to log the metric + + + + + .. py:method:: log_model( [, model_json: Optional[str], run_id: Optional[str]]) + + Log a model. + + **NOTE:** Experimental: This API may change or be removed in a future release without warning. + + :param model_json: str (optional) + MLmodel file in json format. + :param run_id: str (optional) + ID of the run to log under + + + + + .. py:method:: log_param(key: str, value: str [, run_id: Optional[str], run_uuid: Optional[str]]) + + Log a param. + + Logs a param used for a run. A param is a key-value pair (string key, string value). Examples include + hyperparameters used for ML model training and constant dates and values used in an ETL pipeline. A + param can be logged only once for a run. + + :param key: str + Name of the param. Maximum size is 255 bytes. + :param value: str + String value of the param being logged. Maximum size is 500 bytes. + :param run_id: str (optional) + ID of the run under which to log the param. Must be provided. + :param run_uuid: str (optional) + [Deprecated, use run_id instead] ID of the run under which to log the param. This field will be + removed in a future MLflow version. + + + + + .. py:method:: restore_experiment(experiment_id: str) + + Restores an experiment. + + Restore an experiment marked for deletion. This also restores associated metadata, runs, metrics, + params, and tags. If experiment uses FileStore, underlying artifacts associated with experiment are + also restored. + + Throws `RESOURCE_DOES_NOT_EXIST` if experiment was never created or was permanently deleted. + + :param experiment_id: str + ID of the associated experiment. + + + + + .. py:method:: restore_run(run_id: str) + + Restore a run. + + Restores a deleted run. + + :param run_id: str + ID of the run to restore. + + + + + .. py:method:: restore_runs(experiment_id: str, min_timestamp_millis: int [, max_runs: Optional[int]]) -> RestoreRunsResponse + + Restore runs by deletion time. + + Bulk restore runs in an experiment that were deleted no earlier than the specified timestamp. Restores + at most max_runs per request. + + :param experiment_id: str + The ID of the experiment containing the runs to restore. + :param min_timestamp_millis: int + The minimum deletion timestamp in milliseconds since the UNIX epoch for restoring runs. Only runs + deleted no earlier than this timestamp are restored. + :param max_runs: int (optional) + An optional positive integer indicating the maximum number of runs to restore. The maximum allowed + value for max_runs is 10000. + + :returns: :class:`RestoreRunsResponse` + + + .. py:method:: search_experiments( [, filter: Optional[str], max_results: Optional[int], order_by: Optional[List[str]], page_token: Optional[str], view_type: Optional[SearchExperimentsViewType]]) -> Iterator[Experiment] + + Search experiments. + + Searches for experiments that satisfy specified search criteria. + + :param filter: str (optional) + String representing a SQL filter condition (e.g. "name ILIKE 'my-experiment%'") + :param max_results: int (optional) + Maximum number of experiments desired. Max threshold is 3000. + :param order_by: List[str] (optional) + List of columns for ordering search results, which can include experiment name and last updated + timestamp with an optional "DESC" or "ASC" annotation, where "ASC" is the default. Tiebreaks are + done by experiment id DESC. + :param page_token: str (optional) + Token indicating the page of experiments to fetch + :param view_type: :class:`SearchExperimentsViewType` (optional) + Qualifier for type of experiments to be returned. If unspecified, return only active experiments. + + :returns: Iterator over :class:`Experiment` + + + .. py:method:: search_runs( [, experiment_ids: Optional[List[str]], filter: Optional[str], max_results: Optional[int], order_by: Optional[List[str]], page_token: Optional[str], run_view_type: Optional[SearchRunsRunViewType]]) -> Iterator[Run] + + Search for runs. + + Searches for runs that satisfy expressions. + + Search expressions can use `mlflowMetric` and `mlflowParam` keys.", + + :param experiment_ids: List[str] (optional) + List of experiment IDs to search over. + :param filter: str (optional) + A filter expression over params, metrics, and tags, that allows returning a subset of runs. The + syntax is a subset of SQL that supports ANDing together binary operations between a param, metric, + or tag and a constant. + + Example: `metrics.rmse < 1 and params.model_class = 'LogisticRegression'` + + You can select columns with special characters (hyphen, space, period, etc.) by using double quotes: + `metrics."model class" = 'LinearRegression' and tags."user-name" = 'Tomas'` + + Supported operators are `=`, `!=`, `>`, `>=`, `<`, and `<=`. + :param max_results: int (optional) + Maximum number of runs desired. Max threshold is 50000 + :param order_by: List[str] (optional) + List of columns to be ordered by, including attributes, params, metrics, and tags with an optional + "DESC" or "ASC" annotation, where "ASC" is the default. Example: ["params.input DESC", + "metrics.alpha ASC", "metrics.rmse"] Tiebreaks are done by start_time DESC followed by run_id for + runs with the same start time (and this is the default ordering criterion if order_by is not + provided). + :param page_token: str (optional) + Token for the current page of runs. + :param run_view_type: :class:`SearchRunsRunViewType` (optional) + Whether to display only active, only deleted, or all runs. Defaults to only active runs. + + :returns: Iterator over :class:`Run` + + + .. py:method:: set_experiment_tag(experiment_id: str, key: str, value: str) + + Set a tag. + + Sets a tag on an experiment. Experiment tags are metadata that can be updated. + + :param experiment_id: str + ID of the experiment under which to log the tag. Must be provided. + :param key: str + Name of the tag. Maximum size depends on storage backend. All storage backends are guaranteed to + support key values up to 250 bytes in size. + :param value: str + String value of the tag being logged. Maximum size depends on storage backend. All storage backends + are guaranteed to support key values up to 5000 bytes in size. + + + + + .. py:method:: set_permissions(experiment_id: str [, access_control_list: Optional[List[ExperimentAccessControlRequest]]]) -> ExperimentPermissions + + Set experiment permissions. + + Sets permissions on an experiment. Experiments can inherit permissions from their root object. + + :param experiment_id: str + The experiment for which to get or manage permissions. + :param access_control_list: List[:class:`ExperimentAccessControlRequest`] (optional) + + :returns: :class:`ExperimentPermissions` + + + .. py:method:: set_tag(key: str, value: str [, run_id: Optional[str], run_uuid: Optional[str]]) + + Set a tag. + + Sets a tag on a run. Tags are run metadata that can be updated during a run and after a run completes. + + :param key: str + Name of the tag. Maximum size depends on storage backend. All storage backends are guaranteed to + support key values up to 250 bytes in size. + :param value: str + String value of the tag being logged. Maximum size depends on storage backend. All storage backends + are guaranteed to support key values up to 5000 bytes in size. + :param run_id: str (optional) + ID of the run under which to log the tag. Must be provided. + :param run_uuid: str (optional) + [Deprecated, use run_id instead] ID of the run under which to log the tag. This field will be + removed in a future MLflow version. + + + + + .. py:method:: update_experiment(experiment_id: str [, new_name: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + experiment = w.experiments.create_experiment(name=f'sdk-{time.time_ns()}') + + w.experiments.update_experiment(new_name=f'sdk-{time.time_ns()}', experiment_id=experiment.experiment_id) + + # cleanup + w.experiments.delete_experiment(experiment_id=experiment.experiment_id) + + Update an experiment. + + Updates experiment metadata. + + :param experiment_id: str + ID of the associated experiment. + :param new_name: str (optional) + If provided, the experiment's name is changed to the new name. The new name must be unique. + + + + + .. py:method:: update_permissions(experiment_id: str [, access_control_list: Optional[List[ExperimentAccessControlRequest]]]) -> ExperimentPermissions + + Update experiment permissions. + + Updates the permissions on an experiment. Experiments can inherit permissions from their root object. + + :param experiment_id: str + The experiment for which to get or manage permissions. + :param access_control_list: List[:class:`ExperimentAccessControlRequest`] (optional) + + :returns: :class:`ExperimentPermissions` + + + .. py:method:: update_run( [, end_time: Optional[int], run_id: Optional[str], run_uuid: Optional[str], status: Optional[UpdateRunStatus]]) -> UpdateRunResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import ml + + w = WorkspaceClient() + + experiment = w.experiments.create_experiment(name=f'sdk-{time.time_ns()}') + + created = w.experiments.create_run(experiment_id=experiment.experiment_id, + tags=[ml.RunTag(key="foo", value="bar")]) + + _ = w.experiments.update_run(run_id=created.run.info.run_id, status=ml.UpdateRunStatus.KILLED) + + # cleanup + w.experiments.delete_experiment(experiment_id=experiment.experiment_id) + w.experiments.delete_run(run_id=created.run.info.run_id) + + Update a run. + + Updates run metadata. + + :param end_time: int (optional) + Unix timestamp in milliseconds of when the run ended. + :param run_id: str (optional) + ID of the run to update. Must be provided. + :param run_uuid: str (optional) + [Deprecated, use run_id instead] ID of the run to update.. This field will be removed in a future + MLflow version. + :param status: :class:`UpdateRunStatus` (optional) + Updated status of the run. + + :returns: :class:`UpdateRunResponse` + \ No newline at end of file diff --git a/docs/workspace/ml/index.rst b/docs/workspace/ml/index.rst new file mode 100644 index 000000000..1a713eb57 --- /dev/null +++ b/docs/workspace/ml/index.rst @@ -0,0 +1,11 @@ + +Machine Learning +================ + +Create and manage experiments, features, and other machine learning artifacts + +.. toctree:: + :maxdepth: 1 + + experiments + model_registry \ No newline at end of file diff --git a/docs/workspace/ml/model_registry.rst b/docs/workspace/ml/model_registry.rst new file mode 100644 index 000000000..8ac52916f --- /dev/null +++ b/docs/workspace/ml/model_registry.rst @@ -0,0 +1,913 @@ +``w.model_registry``: Model Registry +==================================== +.. currentmodule:: databricks.sdk.service.ml + +.. py:class:: ModelRegistryAPI + + Note: This API reference documents APIs for the Workspace Model Registry. Databricks recommends using + [Models in Unity Catalog](/api/workspace/registeredmodels) instead. Models in Unity Catalog provides + centralized model governance, cross-workspace access, lineage, and deployment. Workspace Model Registry + will be deprecated in the future. + + The Workspace Model Registry is a centralized model repository and a UI and set of APIs that enable you to + manage the full lifecycle of MLflow Models. + + .. py:method:: approve_transition_request(name: str, version: str, stage: Stage, archive_existing_versions: bool [, comment: Optional[str]]) -> ApproveTransitionRequestResponse + + Approve transition request. + + Approves a model version stage transition request. + + :param name: str + Name of the model. + :param version: str + Version of the model. + :param stage: :class:`Stage` + Target stage of the transition. Valid values are: + + * `None`: The initial stage of a model version. + + * `Staging`: Staging or pre-production stage. + + * `Production`: Production stage. + + * `Archived`: Archived stage. + :param archive_existing_versions: bool + Specifies whether to archive all current model versions in the target stage. + :param comment: str (optional) + User-provided comment on the action. + + :returns: :class:`ApproveTransitionRequestResponse` + + + .. py:method:: create_comment(name: str, version: str, comment: str) -> CreateCommentResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + model = w.model_registry.create_model(name=f'sdk-{time.time_ns()}') + + mv = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") + + created = w.model_registry.create_comment(comment=f'sdk-{time.time_ns()}', + name=mv.model_version.name, + version=mv.model_version.version) + + # cleanup + w.model_registry.delete_comment(id=created.comment.id) + + Post a comment. + + Posts a comment on a model version. A comment can be submitted either by a user or programmatically to + display relevant information about the model. For example, test results or deployment errors. + + :param name: str + Name of the model. + :param version: str + Version of the model. + :param comment: str + User-provided comment on the action. + + :returns: :class:`CreateCommentResponse` + + + .. py:method:: create_model(name: str [, description: Optional[str], tags: Optional[List[ModelTag]]]) -> CreateModelResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + model = w.model_registry.create_model(name=f'sdk-{time.time_ns()}') + + Create a model. + + Creates a new registered model with the name specified in the request body. + + Throws `RESOURCE_ALREADY_EXISTS` if a registered model with the given name exists. + + :param name: str + Register models under this name + :param description: str (optional) + Optional description for registered model. + :param tags: List[:class:`ModelTag`] (optional) + Additional metadata for registered model. + + :returns: :class:`CreateModelResponse` + + + .. py:method:: create_model_version(name: str, source: str [, description: Optional[str], run_id: Optional[str], run_link: Optional[str], tags: Optional[List[ModelVersionTag]]]) -> CreateModelVersionResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + model = w.model_registry.create_model(name=f'sdk-{time.time_ns()}') + + mv = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") + + Create a model version. + + Creates a model version. + + :param name: str + Register model under this name + :param source: str + URI indicating the location of the model artifacts. + :param description: str (optional) + Optional description for model version. + :param run_id: str (optional) + MLflow run ID for correlation, if `source` was generated by an experiment run in MLflow tracking + server + :param run_link: str (optional) + MLflow run link - this is the exact link of the run that generated this model version, potentially + hosted at another instance of MLflow. + :param tags: List[:class:`ModelVersionTag`] (optional) + Additional metadata for model version. + + :returns: :class:`CreateModelVersionResponse` + + + .. py:method:: create_transition_request(name: str, version: str, stage: Stage [, comment: Optional[str]]) -> CreateTransitionRequestResponse + + Make a transition request. + + Creates a model version stage transition request. + + :param name: str + Name of the model. + :param version: str + Version of the model. + :param stage: :class:`Stage` + Target stage of the transition. Valid values are: + + * `None`: The initial stage of a model version. + + * `Staging`: Staging or pre-production stage. + + * `Production`: Production stage. + + * `Archived`: Archived stage. + :param comment: str (optional) + User-provided comment on the action. + + :returns: :class:`CreateTransitionRequestResponse` + + + .. py:method:: create_webhook(events: List[RegistryWebhookEvent] [, description: Optional[str], http_url_spec: Optional[HttpUrlSpec], job_spec: Optional[JobSpec], model_name: Optional[str], status: Optional[RegistryWebhookStatus]]) -> CreateWebhookResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import ml + + w = WorkspaceClient() + + created = w.model_registry.create_webhook(description=f'sdk-{time.time_ns()}', + events=[ml.RegistryWebhookEvent.MODEL_VERSION_CREATED], + http_url_spec=ml.HttpUrlSpec(url=w.config.host)) + + # cleanup + w.model_registry.delete_webhook(id=created.webhook.id) + + Create a webhook. + + **NOTE**: This endpoint is in Public Preview. + + Creates a registry webhook. + + :param events: List[:class:`RegistryWebhookEvent`] + Events that can trigger a registry webhook: * `MODEL_VERSION_CREATED`: A new model version was + created for the associated model. + + * `MODEL_VERSION_TRANSITIONED_STAGE`: A model version’s stage was changed. + + * `TRANSITION_REQUEST_CREATED`: A user requested a model version’s stage be transitioned. + + * `COMMENT_CREATED`: A user wrote a comment on a registered model. + + * `REGISTERED_MODEL_CREATED`: A new registered model was created. This event type can only be + specified for a registry-wide webhook, which can be created by not specifying a model name in the + create request. + + * `MODEL_VERSION_TAG_SET`: A user set a tag on the model version. + + * `MODEL_VERSION_TRANSITIONED_TO_STAGING`: A model version was transitioned to staging. + + * `MODEL_VERSION_TRANSITIONED_TO_PRODUCTION`: A model version was transitioned to production. + + * `MODEL_VERSION_TRANSITIONED_TO_ARCHIVED`: A model version was archived. + + * `TRANSITION_REQUEST_TO_STAGING_CREATED`: A user requested a model version be transitioned to + staging. + + * `TRANSITION_REQUEST_TO_PRODUCTION_CREATED`: A user requested a model version be transitioned to + production. + + * `TRANSITION_REQUEST_TO_ARCHIVED_CREATED`: A user requested a model version be archived. + :param description: str (optional) + User-specified description for the webhook. + :param http_url_spec: :class:`HttpUrlSpec` (optional) + :param job_spec: :class:`JobSpec` (optional) + :param model_name: str (optional) + Name of the model whose events would trigger this webhook. + :param status: :class:`RegistryWebhookStatus` (optional) + Enable or disable triggering the webhook, or put the webhook into test mode. The default is + `ACTIVE`: * `ACTIVE`: Webhook is triggered when an associated event happens. + + * `DISABLED`: Webhook is not triggered. + + * `TEST_MODE`: Webhook can be triggered through the test endpoint, but is not triggered on a real + event. + + :returns: :class:`CreateWebhookResponse` + + + .. py:method:: delete_comment(id: str) + + Delete a comment. + + Deletes a comment on a model version. + + :param id: str + + + + + .. py:method:: delete_model(name: str) + + Delete a model. + + Deletes a registered model. + + :param name: str + Registered model unique name identifier. + + + + + .. py:method:: delete_model_tag(name: str, key: str) + + Delete a model tag. + + Deletes the tag for a registered model. + + :param name: str + Name of the registered model that the tag was logged under. + :param key: str + Name of the tag. The name must be an exact match; wild-card deletion is not supported. Maximum size + is 250 bytes. + + + + + .. py:method:: delete_model_version(name: str, version: str) + + Delete a model version. + + Deletes a model version. + + :param name: str + Name of the registered model + :param version: str + Model version number + + + + + .. py:method:: delete_model_version_tag(name: str, version: str, key: str) + + Delete a model version tag. + + Deletes a model version tag. + + :param name: str + Name of the registered model that the tag was logged under. + :param version: str + Model version number that the tag was logged under. + :param key: str + Name of the tag. The name must be an exact match; wild-card deletion is not supported. Maximum size + is 250 bytes. + + + + + .. py:method:: delete_transition_request(name: str, version: str, stage: DeleteTransitionRequestStage, creator: str [, comment: Optional[str]]) + + Delete a transition request. + + Cancels a model version stage transition request. + + :param name: str + Name of the model. + :param version: str + Version of the model. + :param stage: :class:`DeleteTransitionRequestStage` + Target stage of the transition request. Valid values are: + + * `None`: The initial stage of a model version. + + * `Staging`: Staging or pre-production stage. + + * `Production`: Production stage. + + * `Archived`: Archived stage. + :param creator: str + Username of the user who created this request. Of the transition requests matching the specified + details, only the one transition created by this user will be deleted. + :param comment: str (optional) + User-provided comment on the action. + + + + + .. py:method:: delete_webhook( [, id: Optional[str]]) + + Delete a webhook. + + **NOTE:** This endpoint is in Public Preview. + + Deletes a registry webhook. + + :param id: str (optional) + Webhook ID required to delete a registry webhook. + + + + + .. py:method:: get_latest_versions(name: str [, stages: Optional[List[str]]]) -> Iterator[ModelVersion] + + Get the latest version. + + Gets the latest version of a registered model. + + :param name: str + Registered model unique name identifier. + :param stages: List[str] (optional) + List of stages. + + :returns: Iterator over :class:`ModelVersion` + + + .. py:method:: get_model(name: str) -> GetModelResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.model_registry.create_model(name=f'sdk-{time.time_ns()}') + + model = w.model_registry.get_model(name=created.registered_model.name) + + Get model. + + Get the details of a model. This is a Databricks workspace version of the [MLflow endpoint] that also + returns the model's Databricks workspace ID and the permission level of the requesting user on the + model. + + [MLflow endpoint]: https://www.mlflow.org/docs/latest/rest-api.html#get-registeredmodel + + :param name: str + Registered model unique name identifier. + + :returns: :class:`GetModelResponse` + + + .. py:method:: get_model_version(name: str, version: str) -> GetModelVersionResponse + + Get a model version. + + Get a model version. + + :param name: str + Name of the registered model + :param version: str + Model version number + + :returns: :class:`GetModelVersionResponse` + + + .. py:method:: get_model_version_download_uri(name: str, version: str) -> GetModelVersionDownloadUriResponse + + Get a model version URI. + + Gets a URI to download the model version. + + :param name: str + Name of the registered model + :param version: str + Model version number + + :returns: :class:`GetModelVersionDownloadUriResponse` + + + .. py:method:: get_permission_levels(registered_model_id: str) -> GetRegisteredModelPermissionLevelsResponse + + Get registered model permission levels. + + Gets the permission levels that a user can have on an object. + + :param registered_model_id: str + The registered model for which to get or manage permissions. + + :returns: :class:`GetRegisteredModelPermissionLevelsResponse` + + + .. py:method:: get_permissions(registered_model_id: str) -> RegisteredModelPermissions + + Get registered model permissions. + + Gets the permissions of a registered model. Registered models can inherit permissions from their root + object. + + :param registered_model_id: str + The registered model for which to get or manage permissions. + + :returns: :class:`RegisteredModelPermissions` + + + .. py:method:: list_models( [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[Model] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import ml + + w = WorkspaceClient() + + all = w.model_registry.list_models(ml.ListModelsRequest()) + + List models. + + Lists all available registered models, up to the limit specified in __max_results__. + + :param max_results: int (optional) + Maximum number of registered models desired. Max threshold is 1000. + :param page_token: str (optional) + Pagination token to go to the next page based on a previous query. + + :returns: Iterator over :class:`Model` + + + .. py:method:: list_transition_requests(name: str, version: str) -> Iterator[Activity] + + List transition requests. + + Gets a list of all open stage transition requests for the model version. + + :param name: str + Name of the model. + :param version: str + Version of the model. + + :returns: Iterator over :class:`Activity` + + + .. py:method:: list_webhooks( [, events: Optional[List[RegistryWebhookEvent]], model_name: Optional[str], page_token: Optional[str]]) -> Iterator[RegistryWebhook] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import ml + + w = WorkspaceClient() + + all = w.model_registry.list_webhooks(ml.ListWebhooksRequest()) + + List registry webhooks. + + **NOTE:** This endpoint is in Public Preview. + + Lists all registry webhooks. + + :param events: List[:class:`RegistryWebhookEvent`] (optional) + If `events` is specified, any webhook with one or more of the specified trigger events is included + in the output. If `events` is not specified, webhooks of all event types are included in the output. + :param model_name: str (optional) + If not specified, all webhooks associated with the specified events are listed, regardless of their + associated model. + :param page_token: str (optional) + Token indicating the page of artifact results to fetch + + :returns: Iterator over :class:`RegistryWebhook` + + + .. py:method:: reject_transition_request(name: str, version: str, stage: Stage [, comment: Optional[str]]) -> RejectTransitionRequestResponse + + Reject a transition request. + + Rejects a model version stage transition request. + + :param name: str + Name of the model. + :param version: str + Version of the model. + :param stage: :class:`Stage` + Target stage of the transition. Valid values are: + + * `None`: The initial stage of a model version. + + * `Staging`: Staging or pre-production stage. + + * `Production`: Production stage. + + * `Archived`: Archived stage. + :param comment: str (optional) + User-provided comment on the action. + + :returns: :class:`RejectTransitionRequestResponse` + + + .. py:method:: rename_model(name: str [, new_name: Optional[str]]) -> RenameModelResponse + + Rename a model. + + Renames a registered model. + + :param name: str + Registered model unique name identifier. + :param new_name: str (optional) + If provided, updates the name for this `registered_model`. + + :returns: :class:`RenameModelResponse` + + + .. py:method:: search_model_versions( [, filter: Optional[str], max_results: Optional[int], order_by: Optional[List[str]], page_token: Optional[str]]) -> Iterator[ModelVersion] + + Searches model versions. + + Searches for specific model versions based on the supplied __filter__. + + :param filter: str (optional) + String filter condition, like "name='my-model-name'". Must be a single boolean condition, with + string values wrapped in single quotes. + :param max_results: int (optional) + Maximum number of models desired. Max threshold is 10K. + :param order_by: List[str] (optional) + List of columns to be ordered by including model name, version, stage with an optional "DESC" or + "ASC" annotation, where "ASC" is the default. Tiebreaks are done by latest stage transition + timestamp, followed by name ASC, followed by version DESC. + :param page_token: str (optional) + Pagination token to go to next page based on previous search query. + + :returns: Iterator over :class:`ModelVersion` + + + .. py:method:: search_models( [, filter: Optional[str], max_results: Optional[int], order_by: Optional[List[str]], page_token: Optional[str]]) -> Iterator[Model] + + Search models. + + Search for registered models based on the specified __filter__. + + :param filter: str (optional) + String filter condition, like "name LIKE 'my-model-name'". Interpreted in the backend automatically + as "name LIKE '%my-model-name%'". Single boolean condition, with string values wrapped in single + quotes. + :param max_results: int (optional) + Maximum number of models desired. Default is 100. Max threshold is 1000. + :param order_by: List[str] (optional) + List of columns for ordering search results, which can include model name and last updated timestamp + with an optional "DESC" or "ASC" annotation, where "ASC" is the default. Tiebreaks are done by model + name ASC. + :param page_token: str (optional) + Pagination token to go to the next page based on a previous search query. + + :returns: Iterator over :class:`Model` + + + .. py:method:: set_model_tag(name: str, key: str, value: str) + + Set a tag. + + Sets a tag on a registered model. + + :param name: str + Unique name of the model. + :param key: str + Name of the tag. Maximum size depends on storage backend. If a tag with this name already exists, + its preexisting value will be replaced by the specified `value`. All storage backends are guaranteed + to support key values up to 250 bytes in size. + :param value: str + String value of the tag being logged. Maximum size depends on storage backend. All storage backends + are guaranteed to support key values up to 5000 bytes in size. + + + + + .. py:method:: set_model_version_tag(name: str, version: str, key: str, value: str) + + Set a version tag. + + Sets a model version tag. + + :param name: str + Unique name of the model. + :param version: str + Model version number. + :param key: str + Name of the tag. Maximum size depends on storage backend. If a tag with this name already exists, + its preexisting value will be replaced by the specified `value`. All storage backends are guaranteed + to support key values up to 250 bytes in size. + :param value: str + String value of the tag being logged. Maximum size depends on storage backend. All storage backends + are guaranteed to support key values up to 5000 bytes in size. + + + + + .. py:method:: set_permissions(registered_model_id: str [, access_control_list: Optional[List[RegisteredModelAccessControlRequest]]]) -> RegisteredModelPermissions + + Set registered model permissions. + + Sets permissions on a registered model. Registered models can inherit permissions from their root + object. + + :param registered_model_id: str + The registered model for which to get or manage permissions. + :param access_control_list: List[:class:`RegisteredModelAccessControlRequest`] (optional) + + :returns: :class:`RegisteredModelPermissions` + + + .. py:method:: test_registry_webhook(id: str [, event: Optional[RegistryWebhookEvent]]) -> TestRegistryWebhookResponse + + Test a webhook. + + **NOTE:** This endpoint is in Public Preview. + + Tests a registry webhook. + + :param id: str + Webhook ID + :param event: :class:`RegistryWebhookEvent` (optional) + If `event` is specified, the test trigger uses the specified event. If `event` is not specified, the + test trigger uses a randomly chosen event associated with the webhook. + + :returns: :class:`TestRegistryWebhookResponse` + + + .. py:method:: transition_stage(name: str, version: str, stage: Stage, archive_existing_versions: bool [, comment: Optional[str]]) -> TransitionStageResponse + + Transition a stage. + + Transition a model version's stage. This is a Databricks workspace version of the [MLflow endpoint] + that also accepts a comment associated with the transition to be recorded.", + + [MLflow endpoint]: https://www.mlflow.org/docs/latest/rest-api.html#transition-modelversion-stage + + :param name: str + Name of the model. + :param version: str + Version of the model. + :param stage: :class:`Stage` + Target stage of the transition. Valid values are: + + * `None`: The initial stage of a model version. + + * `Staging`: Staging or pre-production stage. + + * `Production`: Production stage. + + * `Archived`: Archived stage. + :param archive_existing_versions: bool + Specifies whether to archive all current model versions in the target stage. + :param comment: str (optional) + User-provided comment on the action. + + :returns: :class:`TransitionStageResponse` + + + .. py:method:: update_comment(id: str, comment: str) -> UpdateCommentResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + model = w.model_registry.create_model(name=f'sdk-{time.time_ns()}') + + mv = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") + + created = w.model_registry.create_comment(comment=f'sdk-{time.time_ns()}', + name=mv.model_version.name, + version=mv.model_version.version) + + _ = w.model_registry.update_comment(comment=f'sdk-{time.time_ns()}', id=created.comment.id) + + # cleanup + w.model_registry.delete_comment(id=created.comment.id) + + Update a comment. + + Post an edit to a comment on a model version. + + :param id: str + Unique identifier of an activity + :param comment: str + User-provided comment on the action. + + :returns: :class:`UpdateCommentResponse` + + + .. py:method:: update_model(name: str [, description: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + model = w.model_registry.create_model(name=f'sdk-{time.time_ns()}') + + created = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") + + w.model_registry.update_model_version(description=f'sdk-{time.time_ns()}', + name=created.model_version.name, + version=created.model_version.version) + + Update model. + + Updates a registered model. + + :param name: str + Registered model unique name identifier. + :param description: str (optional) + If provided, updates the description for this `registered_model`. + + + + + .. py:method:: update_model_version(name: str, version: str [, description: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + model = w.model_registry.create_model(name=f'sdk-{time.time_ns()}') + + created = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") + + w.model_registry.update_model_version(description=f'sdk-{time.time_ns()}', + name=created.model_version.name, + version=created.model_version.version) + + Update model version. + + Updates the model version. + + :param name: str + Name of the registered model + :param version: str + Model version number + :param description: str (optional) + If provided, updates the description for this `registered_model`. + + + + + .. py:method:: update_permissions(registered_model_id: str [, access_control_list: Optional[List[RegisteredModelAccessControlRequest]]]) -> RegisteredModelPermissions + + Update registered model permissions. + + Updates the permissions on a registered model. Registered models can inherit permissions from their + root object. + + :param registered_model_id: str + The registered model for which to get or manage permissions. + :param access_control_list: List[:class:`RegisteredModelAccessControlRequest`] (optional) + + :returns: :class:`RegisteredModelPermissions` + + + .. py:method:: update_webhook(id: str [, description: Optional[str], events: Optional[List[RegistryWebhookEvent]], http_url_spec: Optional[HttpUrlSpec], job_spec: Optional[JobSpec], status: Optional[RegistryWebhookStatus]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import ml + + w = WorkspaceClient() + + created = w.model_registry.create_webhook(description=f'sdk-{time.time_ns()}', + events=[ml.RegistryWebhookEvent.MODEL_VERSION_CREATED], + http_url_spec=ml.HttpUrlSpec(url=w.config.host)) + + w.model_registry.update_webhook(id=created.webhook.id, description=f'sdk-{time.time_ns()}') + + # cleanup + w.model_registry.delete_webhook(id=created.webhook.id) + + Update a webhook. + + **NOTE:** This endpoint is in Public Preview. + + Updates a registry webhook. + + :param id: str + Webhook ID + :param description: str (optional) + User-specified description for the webhook. + :param events: List[:class:`RegistryWebhookEvent`] (optional) + Events that can trigger a registry webhook: * `MODEL_VERSION_CREATED`: A new model version was + created for the associated model. + + * `MODEL_VERSION_TRANSITIONED_STAGE`: A model version’s stage was changed. + + * `TRANSITION_REQUEST_CREATED`: A user requested a model version’s stage be transitioned. + + * `COMMENT_CREATED`: A user wrote a comment on a registered model. + + * `REGISTERED_MODEL_CREATED`: A new registered model was created. This event type can only be + specified for a registry-wide webhook, which can be created by not specifying a model name in the + create request. + + * `MODEL_VERSION_TAG_SET`: A user set a tag on the model version. + + * `MODEL_VERSION_TRANSITIONED_TO_STAGING`: A model version was transitioned to staging. + + * `MODEL_VERSION_TRANSITIONED_TO_PRODUCTION`: A model version was transitioned to production. + + * `MODEL_VERSION_TRANSITIONED_TO_ARCHIVED`: A model version was archived. + + * `TRANSITION_REQUEST_TO_STAGING_CREATED`: A user requested a model version be transitioned to + staging. + + * `TRANSITION_REQUEST_TO_PRODUCTION_CREATED`: A user requested a model version be transitioned to + production. + + * `TRANSITION_REQUEST_TO_ARCHIVED_CREATED`: A user requested a model version be archived. + :param http_url_spec: :class:`HttpUrlSpec` (optional) + :param job_spec: :class:`JobSpec` (optional) + :param status: :class:`RegistryWebhookStatus` (optional) + Enable or disable triggering the webhook, or put the webhook into test mode. The default is + `ACTIVE`: * `ACTIVE`: Webhook is triggered when an associated event happens. + + * `DISABLED`: Webhook is not triggered. + + * `TEST_MODE`: Webhook can be triggered through the test endpoint, but is not triggered on a real + event. + + + \ No newline at end of file diff --git a/docs/workspace/pipelines/index.rst b/docs/workspace/pipelines/index.rst new file mode 100644 index 000000000..83aaafe99 --- /dev/null +++ b/docs/workspace/pipelines/index.rst @@ -0,0 +1,10 @@ + +Delta Live Tables +================= + +Manage pipelines, runs, and other Delta Live Table resources + +.. toctree:: + :maxdepth: 1 + + pipelines \ No newline at end of file diff --git a/docs/workspace/pipelines/pipelines.rst b/docs/workspace/pipelines/pipelines.rst new file mode 100644 index 000000000..29872c900 --- /dev/null +++ b/docs/workspace/pipelines/pipelines.rst @@ -0,0 +1,497 @@ +``w.pipelines``: Pipelines +========================== +.. currentmodule:: databricks.sdk.service.pipelines + +.. py:class:: PipelinesAPI + + The Delta Live Tables API allows you to create, edit, delete, start, and view details about pipelines. + + Delta Live Tables is a framework for building reliable, maintainable, and testable data processing + pipelines. You define the transformations to perform on your data, and Delta Live Tables manages task + orchestration, cluster management, monitoring, data quality, and error handling. + + Instead of defining your data pipelines using a series of separate Apache Spark tasks, Delta Live Tables + manages how your data is transformed based on a target schema you define for each processing step. You can + also enforce data quality with Delta Live Tables expectations. Expectations allow you to define expected + data quality and specify how to handle records that fail those expectations. + + .. py:method:: create( [, allow_duplicate_names: Optional[bool], catalog: Optional[str], channel: Optional[str], clusters: Optional[List[PipelineCluster]], configuration: Optional[Dict[str, str]], continuous: Optional[bool], development: Optional[bool], dry_run: Optional[bool], edition: Optional[str], filters: Optional[Filters], id: Optional[str], libraries: Optional[List[PipelineLibrary]], name: Optional[str], notifications: Optional[List[Notifications]], photon: Optional[bool], serverless: Optional[bool], storage: Optional[str], target: Optional[str], trigger: Optional[PipelineTrigger]]) -> CreatePipelineResponse + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import pipelines + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + created = w.pipelines.create( + continuous=False, + name=f'sdk-{time.time_ns()}', + libraries=[pipelines.PipelineLibrary(notebook=pipelines.NotebookLibrary(path=notebook_path))], + clusters=[ + pipelines.PipelineCluster(instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + label="default", + num_workers=1, + custom_tags={ + "cluster_type": "default", + }) + ]) + + # cleanup + w.pipelines.delete(pipeline_id=created.pipeline_id) + + Create a pipeline. + + Creates a new data processing pipeline based on the requested configuration. If successful, this + method returns the ID of the new pipeline. + + :param allow_duplicate_names: bool (optional) + If false, deployment will fail if name conflicts with that of another pipeline. + :param catalog: str (optional) + A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables + in this pipeline are published to a `target` schema inside `catalog` (for example, + `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. + :param channel: str (optional) + DLT Release Channel that specifies which version to use. + :param clusters: List[:class:`PipelineCluster`] (optional) + Cluster settings for this pipeline deployment. + :param configuration: Dict[str,str] (optional) + String-String configuration for this pipeline execution. + :param continuous: bool (optional) + Whether the pipeline is continuous or triggered. This replaces `trigger`. + :param development: bool (optional) + Whether the pipeline is in Development mode. Defaults to false. + :param dry_run: bool (optional) + :param edition: str (optional) + Pipeline product edition. + :param filters: :class:`Filters` (optional) + Filters on which Pipeline packages to include in the deployed graph. + :param id: str (optional) + Unique identifier for this pipeline. + :param libraries: List[:class:`PipelineLibrary`] (optional) + Libraries or code needed by this deployment. + :param name: str (optional) + Friendly identifier for this pipeline. + :param notifications: List[:class:`Notifications`] (optional) + List of notification settings for this pipeline. + :param photon: bool (optional) + Whether Photon is enabled for this pipeline. + :param serverless: bool (optional) + Whether serverless compute is enabled for this pipeline. + :param storage: str (optional) + DBFS root directory for storing checkpoints and tables. + :param target: str (optional) + Target schema (database) to add tables in this pipeline to. If not specified, no data is published + to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. + :param trigger: :class:`PipelineTrigger` (optional) + Which pipeline trigger to use. Deprecated: Use `continuous` instead. + + :returns: :class:`CreatePipelineResponse` + + + .. py:method:: delete(pipeline_id: str) + + Delete a pipeline. + + Deletes a pipeline. + + :param pipeline_id: str + + + + + .. py:method:: get(pipeline_id: str) -> GetPipelineResponse + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import pipelines + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + created = w.pipelines.create( + continuous=False, + name=f'sdk-{time.time_ns()}', + libraries=[pipelines.PipelineLibrary(notebook=pipelines.NotebookLibrary(path=notebook_path))], + clusters=[ + pipelines.PipelineCluster(instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + label="default", + num_workers=1, + custom_tags={ + "cluster_type": "default", + }) + ]) + + by_id = w.pipelines.get(pipeline_id=created.pipeline_id) + + # cleanup + w.pipelines.delete(pipeline_id=created.pipeline_id) + + Get a pipeline. + + :param pipeline_id: str + + :returns: :class:`GetPipelineResponse` + + + .. py:method:: get_permission_levels(pipeline_id: str) -> GetPipelinePermissionLevelsResponse + + Get pipeline permission levels. + + Gets the permission levels that a user can have on an object. + + :param pipeline_id: str + The pipeline for which to get or manage permissions. + + :returns: :class:`GetPipelinePermissionLevelsResponse` + + + .. py:method:: get_permissions(pipeline_id: str) -> PipelinePermissions + + Get pipeline permissions. + + Gets the permissions of a pipeline. Pipelines can inherit permissions from their root object. + + :param pipeline_id: str + The pipeline for which to get or manage permissions. + + :returns: :class:`PipelinePermissions` + + + .. py:method:: get_update(pipeline_id: str, update_id: str) -> GetUpdateResponse + + Get a pipeline update. + + Gets an update from an active pipeline. + + :param pipeline_id: str + The ID of the pipeline. + :param update_id: str + The ID of the update. + + :returns: :class:`GetUpdateResponse` + + + .. py:method:: list_pipeline_events(pipeline_id: str [, filter: Optional[str], max_results: Optional[int], order_by: Optional[List[str]], page_token: Optional[str]]) -> Iterator[PipelineEvent] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import pipelines + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + created = w.pipelines.create( + continuous=False, + name=f'sdk-{time.time_ns()}', + libraries=[pipelines.PipelineLibrary(notebook=pipelines.NotebookLibrary(path=notebook_path))], + clusters=[ + pipelines.PipelineCluster(instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + label="default", + num_workers=1, + custom_tags={ + "cluster_type": "default", + }) + ]) + + events = w.pipelines.list_pipeline_events(pipeline_id=created.pipeline_id) + + # cleanup + w.pipelines.delete(pipeline_id=created.pipeline_id) + + List pipeline events. + + Retrieves events for a pipeline. + + :param pipeline_id: str + :param filter: str (optional) + Criteria to select a subset of results, expressed using a SQL-like syntax. The supported filters + are: 1. level='INFO' (or WARN or ERROR) 2. level in ('INFO', 'WARN') 3. id='[event-id]' 4. timestamp + > 'TIMESTAMP' (or >=,<,<=,=) + + Composite expressions are supported, for example: level in ('ERROR', 'WARN') AND timestamp> + '2021-07-22T06:37:33.083Z' + :param max_results: int (optional) + Max number of entries to return in a single page. The system may return fewer than max_results + events in a response, even if there are more events available. + :param order_by: List[str] (optional) + A string indicating a sort order by timestamp for the results, for example, ["timestamp asc"]. The + sort order can be ascending or descending. By default, events are returned in descending order by + timestamp. + :param page_token: str (optional) + Page token returned by previous call. This field is mutually exclusive with all fields in this + request except max_results. An error is returned if any fields other than max_results are set when + this field is set. + + :returns: Iterator over :class:`PipelineEvent` + + + .. py:method:: list_pipelines( [, filter: Optional[str], max_results: Optional[int], order_by: Optional[List[str]], page_token: Optional[str]]) -> Iterator[PipelineStateInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import pipelines + + w = WorkspaceClient() + + all = w.pipelines.list_pipelines(pipelines.ListPipelinesRequest()) + + List pipelines. + + Lists pipelines defined in the Delta Live Tables system. + + :param filter: str (optional) + Select a subset of results based on the specified criteria. The supported filters are: + + * `notebook=''` to select pipelines that reference the provided notebook path. * `name LIKE + '[pattern]'` to select pipelines with a name that matches pattern. Wildcards are supported, for + example: `name LIKE '%shopping%'` + + Composite filters are not supported. This field is optional. + :param max_results: int (optional) + The maximum number of entries to return in a single page. The system may return fewer than + max_results events in a response, even if there are more events available. This field is optional. + The default value is 25. The maximum value is 100. An error is returned if the value of max_results + is greater than 100. + :param order_by: List[str] (optional) + A list of strings specifying the order of results. Supported order_by fields are id and name. The + default is id asc. This field is optional. + :param page_token: str (optional) + Page token returned by previous call + + :returns: Iterator over :class:`PipelineStateInfo` + + + .. py:method:: list_updates(pipeline_id: str [, max_results: Optional[int], page_token: Optional[str], until_update_id: Optional[str]]) -> ListUpdatesResponse + + List pipeline updates. + + List updates for an active pipeline. + + :param pipeline_id: str + The pipeline to return updates for. + :param max_results: int (optional) + Max number of entries to return in a single page. + :param page_token: str (optional) + Page token returned by previous call + :param until_update_id: str (optional) + If present, returns updates until and including this update_id. + + :returns: :class:`ListUpdatesResponse` + + + .. py:method:: reset(pipeline_id: str) -> Wait[GetPipelineResponse] + + Reset a pipeline. + + Resets a pipeline. + + :param pipeline_id: str + + :returns: + Long-running operation waiter for :class:`GetPipelineResponse`. + See :method:wait_get_pipeline_running for more details. + + + .. py:method:: reset_and_wait(pipeline_id: str, timeout: datetime.timedelta = 0:20:00) -> GetPipelineResponse + + + .. py:method:: set_permissions(pipeline_id: str [, access_control_list: Optional[List[PipelineAccessControlRequest]]]) -> PipelinePermissions + + Set pipeline permissions. + + Sets permissions on a pipeline. Pipelines can inherit permissions from their root object. + + :param pipeline_id: str + The pipeline for which to get or manage permissions. + :param access_control_list: List[:class:`PipelineAccessControlRequest`] (optional) + + :returns: :class:`PipelinePermissions` + + + .. py:method:: start_update(pipeline_id: str [, cause: Optional[StartUpdateCause], full_refresh: Optional[bool], full_refresh_selection: Optional[List[str]], refresh_selection: Optional[List[str]], validate_only: Optional[bool]]) -> StartUpdateResponse + + Start a pipeline. + + Starts a new update for the pipeline. If there is already an active update for the pipeline, the + request will fail and the active update will remain running. + + :param pipeline_id: str + :param cause: :class:`StartUpdateCause` (optional) + :param full_refresh: bool (optional) + If true, this update will reset all tables before running. + :param full_refresh_selection: List[str] (optional) + A list of tables to update with fullRefresh. If both refresh_selection and full_refresh_selection + are empty, this is a full graph update. Full Refresh on a table means that the states of the table + will be reset before the refresh. + :param refresh_selection: List[str] (optional) + A list of tables to update without fullRefresh. If both refresh_selection and full_refresh_selection + are empty, this is a full graph update. Full Refresh on a table means that the states of the table + will be reset before the refresh. + :param validate_only: bool (optional) + If true, this update only validates the correctness of pipeline source code but does not materialize + or publish any datasets. + + :returns: :class:`StartUpdateResponse` + + + .. py:method:: stop(pipeline_id: str) -> Wait[GetPipelineResponse] + + Stop a pipeline. + + Stops the pipeline by canceling the active update. If there is no active update for the pipeline, this + request is a no-op. + + :param pipeline_id: str + + :returns: + Long-running operation waiter for :class:`GetPipelineResponse`. + See :method:wait_get_pipeline_idle for more details. + + + .. py:method:: stop_and_wait(pipeline_id: str, timeout: datetime.timedelta = 0:20:00) -> GetPipelineResponse + + + .. py:method:: update(pipeline_id: str [, allow_duplicate_names: Optional[bool], catalog: Optional[str], channel: Optional[str], clusters: Optional[List[PipelineCluster]], configuration: Optional[Dict[str, str]], continuous: Optional[bool], development: Optional[bool], edition: Optional[str], expected_last_modified: Optional[int], filters: Optional[Filters], id: Optional[str], libraries: Optional[List[PipelineLibrary]], name: Optional[str], notifications: Optional[List[Notifications]], photon: Optional[bool], serverless: Optional[bool], storage: Optional[str], target: Optional[str], trigger: Optional[PipelineTrigger]]) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import pipelines + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + created = w.pipelines.create( + continuous=False, + name=f'sdk-{time.time_ns()}', + libraries=[pipelines.PipelineLibrary(notebook=pipelines.NotebookLibrary(path=notebook_path))], + clusters=[ + pipelines.PipelineCluster(instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + label="default", + num_workers=1, + custom_tags={ + "cluster_type": "default", + }) + ]) + + w.pipelines.update( + pipeline_id=created.pipeline_id, + name=f'sdk-{time.time_ns()}', + libraries=[pipelines.PipelineLibrary(notebook=pipelines.NotebookLibrary(path=notebook_path))], + clusters=[ + pipelines.PipelineCluster(instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + label="default", + num_workers=1, + custom_tags={ + "cluster_type": "default", + }) + ]) + + # cleanup + w.pipelines.delete(pipeline_id=created.pipeline_id) + + Edit a pipeline. + + Updates a pipeline with the supplied configuration. + + :param pipeline_id: str + Unique identifier for this pipeline. + :param allow_duplicate_names: bool (optional) + If false, deployment will fail if name has changed and conflicts the name of another pipeline. + :param catalog: str (optional) + A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables + in this pipeline are published to a `target` schema inside `catalog` (for example, + `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. + :param channel: str (optional) + DLT Release Channel that specifies which version to use. + :param clusters: List[:class:`PipelineCluster`] (optional) + Cluster settings for this pipeline deployment. + :param configuration: Dict[str,str] (optional) + String-String configuration for this pipeline execution. + :param continuous: bool (optional) + Whether the pipeline is continuous or triggered. This replaces `trigger`. + :param development: bool (optional) + Whether the pipeline is in Development mode. Defaults to false. + :param edition: str (optional) + Pipeline product edition. + :param expected_last_modified: int (optional) + If present, the last-modified time of the pipeline settings before the edit. If the settings were + modified after that time, then the request will fail with a conflict. + :param filters: :class:`Filters` (optional) + Filters on which Pipeline packages to include in the deployed graph. + :param id: str (optional) + Unique identifier for this pipeline. + :param libraries: List[:class:`PipelineLibrary`] (optional) + Libraries or code needed by this deployment. + :param name: str (optional) + Friendly identifier for this pipeline. + :param notifications: List[:class:`Notifications`] (optional) + List of notification settings for this pipeline. + :param photon: bool (optional) + Whether Photon is enabled for this pipeline. + :param serverless: bool (optional) + Whether serverless compute is enabled for this pipeline. + :param storage: str (optional) + DBFS root directory for storing checkpoints and tables. + :param target: str (optional) + Target schema (database) to add tables in this pipeline to. If not specified, no data is published + to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. + :param trigger: :class:`PipelineTrigger` (optional) + Which pipeline trigger to use. Deprecated: Use `continuous` instead. + + + + + .. py:method:: update_permissions(pipeline_id: str [, access_control_list: Optional[List[PipelineAccessControlRequest]]]) -> PipelinePermissions + + Update pipeline permissions. + + Updates the permissions on a pipeline. Pipelines can inherit permissions from their root object. + + :param pipeline_id: str + The pipeline for which to get or manage permissions. + :param access_control_list: List[:class:`PipelineAccessControlRequest`] (optional) + + :returns: :class:`PipelinePermissions` + + + .. py:method:: wait_get_pipeline_idle(pipeline_id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[GetPipelineResponse], None]]) -> GetPipelineResponse + + + .. py:method:: wait_get_pipeline_running(pipeline_id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[GetPipelineResponse], None]]) -> GetPipelineResponse diff --git a/docs/workspace/serving/apps.rst b/docs/workspace/serving/apps.rst new file mode 100644 index 000000000..bd2f6bed2 --- /dev/null +++ b/docs/workspace/serving/apps.rst @@ -0,0 +1,81 @@ +``w.apps``: Databricks Apps +=========================== +.. currentmodule:: databricks.sdk.service.serving + +.. py:class:: AppsAPI + + Lakehouse Apps run directly on a customer’s Databricks instance, integrate with their data, use and + extend Databricks services, and enable users to interact through single sign-on. + + .. py:method:: create(manifest: AppManifest [, resources: Optional[Any]]) -> DeploymentStatus + + Create and deploy an application. + + Creates and deploys an application. + + :param manifest: :class:`AppManifest` + Manifest that specifies the application requirements + :param resources: Any (optional) + Information passed at app deployment time to fulfill app dependencies + + :returns: :class:`DeploymentStatus` + + + .. py:method:: delete_app(name: str) -> DeleteAppResponse + + Delete an application. + + Delete an application definition + + :param name: str + The name of an application. This field is required. + + :returns: :class:`DeleteAppResponse` + + + .. py:method:: get_app(name: str) -> GetAppResponse + + Get definition for an application. + + Get an application definition + + :param name: str + The name of an application. This field is required. + + :returns: :class:`GetAppResponse` + + + .. py:method:: get_app_deployment_status(deployment_id: str [, include_app_log: Optional[str]]) -> DeploymentStatus + + Get deployment status for an application. + + Get deployment status for an application + + :param deployment_id: str + The deployment id for an application. This field is required. + :param include_app_log: str (optional) + Boolean flag to include application logs + + :returns: :class:`DeploymentStatus` + + + .. py:method:: get_apps() -> ListAppsResponse + + List all applications. + + List all available applications + + :returns: :class:`ListAppsResponse` + + + .. py:method:: get_events(name: str) -> ListAppEventsResponse + + Get deployment events for an application. + + Get deployment events for an application + + :param name: str + The name of an application. This field is required. + + :returns: :class:`ListAppEventsResponse` + \ No newline at end of file diff --git a/docs/workspace/serving/index.rst b/docs/workspace/serving/index.rst new file mode 100644 index 000000000..ce3d216ff --- /dev/null +++ b/docs/workspace/serving/index.rst @@ -0,0 +1,11 @@ + +Real-time Serving +================= + +Use real-time inference for machine learning + +.. toctree:: + :maxdepth: 1 + + apps + serving_endpoints \ No newline at end of file diff --git a/docs/workspace/serving/serving_endpoints.rst b/docs/workspace/serving/serving_endpoints.rst new file mode 100644 index 000000000..8972b0877 --- /dev/null +++ b/docs/workspace/serving/serving_endpoints.rst @@ -0,0 +1,276 @@ +``w.serving_endpoints``: Serving endpoints +========================================== +.. currentmodule:: databricks.sdk.service.serving + +.. py:class:: ServingEndpointsAPI + + The Serving Endpoints API allows you to create, update, and delete model serving endpoints. + + You can use a serving endpoint to serve models from the Databricks Model Registry or from Unity Catalog. + Endpoints expose the underlying models as scalable REST API endpoints using serverless compute. This means + the endpoints and associated compute resources are fully managed by Databricks and will not appear in your + cloud account. A serving endpoint can consist of one or more MLflow models from the Databricks Model + Registry, called served entities. A serving endpoint can have at most ten served entities. You can + configure traffic settings to define how requests should be routed to your served entities behind an + endpoint. Additionally, you can configure the scale of resources that should be applied to each served + entity. + + .. py:method:: build_logs(name: str, served_model_name: str) -> BuildLogsResponse + + Retrieve the logs associated with building the model's environment for a given serving endpoint's + served model. + + Retrieves the build logs associated with the provided served model. + + :param name: str + The name of the serving endpoint that the served model belongs to. This field is required. + :param served_model_name: str + The name of the served model that build logs will be retrieved for. This field is required. + + :returns: :class:`BuildLogsResponse` + + + .. py:method:: create(name: str, config: EndpointCoreConfigInput [, rate_limits: Optional[List[RateLimit]], tags: Optional[List[EndpointTag]]]) -> Wait[ServingEndpointDetailed] + + Create a new serving endpoint. + + :param name: str + The name of the serving endpoint. This field is required and must be unique across a Databricks + workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. + :param config: :class:`EndpointCoreConfigInput` + The core config of the serving endpoint. + :param rate_limits: List[:class:`RateLimit`] (optional) + Rate limits to be applied to the serving endpoint. NOTE: only external and foundation model + endpoints are supported as of now. + :param tags: List[:class:`EndpointTag`] (optional) + Tags to be attached to the serving endpoint and automatically propagated to billing logs. + + :returns: + Long-running operation waiter for :class:`ServingEndpointDetailed`. + See :method:wait_get_serving_endpoint_not_updating for more details. + + + .. py:method:: create_and_wait(name: str, config: EndpointCoreConfigInput [, rate_limits: Optional[List[RateLimit]], tags: Optional[List[EndpointTag]], timeout: datetime.timedelta = 0:20:00]) -> ServingEndpointDetailed + + + .. py:method:: delete(name: str) + + Delete a serving endpoint. + + :param name: str + The name of the serving endpoint. This field is required. + + + + + .. py:method:: export_metrics(name: str) + + Retrieve the metrics associated with a serving endpoint. + + Retrieves the metrics associated with the provided serving endpoint in either Prometheus or + OpenMetrics exposition format. + + :param name: str + The name of the serving endpoint to retrieve metrics for. This field is required. + + + + + .. py:method:: get(name: str) -> ServingEndpointDetailed + + Get a single serving endpoint. + + Retrieves the details for a single serving endpoint. + + :param name: str + The name of the serving endpoint. This field is required. + + :returns: :class:`ServingEndpointDetailed` + + + .. py:method:: get_permission_levels(serving_endpoint_id: str) -> GetServingEndpointPermissionLevelsResponse + + Get serving endpoint permission levels. + + Gets the permission levels that a user can have on an object. + + :param serving_endpoint_id: str + The serving endpoint for which to get or manage permissions. + + :returns: :class:`GetServingEndpointPermissionLevelsResponse` + + + .. py:method:: get_permissions(serving_endpoint_id: str) -> ServingEndpointPermissions + + Get serving endpoint permissions. + + Gets the permissions of a serving endpoint. Serving endpoints can inherit permissions from their root + object. + + :param serving_endpoint_id: str + The serving endpoint for which to get or manage permissions. + + :returns: :class:`ServingEndpointPermissions` + + + .. py:method:: list() -> Iterator[ServingEndpoint] + + Retrieve all serving endpoints. + + :returns: Iterator over :class:`ServingEndpoint` + + + .. py:method:: logs(name: str, served_model_name: str) -> ServerLogsResponse + + Retrieve the most recent log lines associated with a given serving endpoint's served model. + + Retrieves the service logs associated with the provided served model. + + :param name: str + The name of the serving endpoint that the served model belongs to. This field is required. + :param served_model_name: str + The name of the served model that logs will be retrieved for. This field is required. + + :returns: :class:`ServerLogsResponse` + + + .. py:method:: patch(name: str [, add_tags: Optional[List[EndpointTag]], delete_tags: Optional[List[str]]]) -> Iterator[EndpointTag] + + Patch the tags of a serving endpoint. + + Used to batch add and delete tags from a serving endpoint with a single API call. + + :param name: str + The name of the serving endpoint who's tags to patch. This field is required. + :param add_tags: List[:class:`EndpointTag`] (optional) + List of endpoint tags to add + :param delete_tags: List[str] (optional) + List of tag keys to delete + + :returns: Iterator over :class:`EndpointTag` + + + .. py:method:: put(name: str [, rate_limits: Optional[List[RateLimit]]]) -> PutResponse + + Update the rate limits of a serving endpoint. + + Used to update the rate limits of a serving endpoint. NOTE: only external and foundation model + endpoints are supported as of now. + + :param name: str + The name of the serving endpoint whose rate limits are being updated. This field is required. + :param rate_limits: List[:class:`RateLimit`] (optional) + The list of endpoint rate limits. + + :returns: :class:`PutResponse` + + + .. py:method:: query(name: str [, dataframe_records: Optional[List[Any]], dataframe_split: Optional[DataframeSplitInput], extra_params: Optional[Dict[str, str]], input: Optional[Any], inputs: Optional[Any], instances: Optional[List[Any]], max_tokens: Optional[int], messages: Optional[List[ChatMessage]], n: Optional[int], prompt: Optional[Any], stop: Optional[List[str]], stream: Optional[bool], temperature: Optional[float]]) -> QueryEndpointResponse + + Query a serving endpoint with provided model input. + + :param name: str + The name of the serving endpoint. This field is required. + :param dataframe_records: List[Any] (optional) + Pandas Dataframe input in the records orientation. + :param dataframe_split: :class:`DataframeSplitInput` (optional) + Pandas Dataframe input in the split orientation. + :param extra_params: Dict[str,str] (optional) + The extra parameters field used ONLY for __completions, chat,__ and __embeddings external & + foundation model__ serving endpoints. This is a map of strings and should only be used with other + external/foundation model query fields. + :param input: Any (optional) + The input string (or array of strings) field used ONLY for __embeddings external & foundation + model__ serving endpoints and is the only field (along with extra_params if needed) used by + embeddings queries. + :param inputs: Any (optional) + Tensor-based input in columnar format. + :param instances: List[Any] (optional) + Tensor-based input in row format. + :param max_tokens: int (optional) + The max tokens field used ONLY for __completions__ and __chat external & foundation model__ serving + endpoints. This is an integer and should only be used with other chat/completions query fields. + :param messages: List[:class:`ChatMessage`] (optional) + The messages field used ONLY for __chat external & foundation model__ serving endpoints. This is a + map of strings and should only be used with other chat query fields. + :param n: int (optional) + The n (number of candidates) field used ONLY for __completions__ and __chat external & foundation + model__ serving endpoints. This is an integer between 1 and 5 with a default of 1 and should only be + used with other chat/completions query fields. + :param prompt: Any (optional) + The prompt string (or array of strings) field used ONLY for __completions external & foundation + model__ serving endpoints and should only be used with other completions query fields. + :param stop: List[str] (optional) + The stop sequences field used ONLY for __completions__ and __chat external & foundation model__ + serving endpoints. This is a list of strings and should only be used with other chat/completions + query fields. + :param stream: bool (optional) + The stream field used ONLY for __completions__ and __chat external & foundation model__ serving + endpoints. This is a boolean defaulting to false and should only be used with other chat/completions + query fields. + :param temperature: float (optional) + The temperature field used ONLY for __completions__ and __chat external & foundation model__ serving + endpoints. This is a float between 0.0 and 2.0 with a default of 1.0 and should only be used with + other chat/completions query fields. + + :returns: :class:`QueryEndpointResponse` + + + .. py:method:: set_permissions(serving_endpoint_id: str [, access_control_list: Optional[List[ServingEndpointAccessControlRequest]]]) -> ServingEndpointPermissions + + Set serving endpoint permissions. + + Sets permissions on a serving endpoint. Serving endpoints can inherit permissions from their root + object. + + :param serving_endpoint_id: str + The serving endpoint for which to get or manage permissions. + :param access_control_list: List[:class:`ServingEndpointAccessControlRequest`] (optional) + + :returns: :class:`ServingEndpointPermissions` + + + .. py:method:: update_config(name: str [, auto_capture_config: Optional[AutoCaptureConfigInput], served_entities: Optional[List[ServedEntityInput]], served_models: Optional[List[ServedModelInput]], traffic_config: Optional[TrafficConfig]]) -> Wait[ServingEndpointDetailed] + + Update a serving endpoint with a new config. + + Updates any combination of the serving endpoint's served entities, the compute configuration of those + served entities, and the endpoint's traffic config. An endpoint that already has an update in progress + can not be updated until the current update completes or fails. + + :param name: str + The name of the serving endpoint to update. This field is required. + :param auto_capture_config: :class:`AutoCaptureConfigInput` (optional) + Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. + :param served_entities: List[:class:`ServedEntityInput`] (optional) + A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served + entities. + :param served_models: List[:class:`ServedModelInput`] (optional) + (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A + serving endpoint can have up to 10 served models. + :param traffic_config: :class:`TrafficConfig` (optional) + The traffic config defining how invocations to the serving endpoint should be routed. + + :returns: + Long-running operation waiter for :class:`ServingEndpointDetailed`. + See :method:wait_get_serving_endpoint_not_updating for more details. + + + .. py:method:: update_config_and_wait(name: str [, auto_capture_config: Optional[AutoCaptureConfigInput], served_entities: Optional[List[ServedEntityInput]], served_models: Optional[List[ServedModelInput]], traffic_config: Optional[TrafficConfig], timeout: datetime.timedelta = 0:20:00]) -> ServingEndpointDetailed + + + .. py:method:: update_permissions(serving_endpoint_id: str [, access_control_list: Optional[List[ServingEndpointAccessControlRequest]]]) -> ServingEndpointPermissions + + Update serving endpoint permissions. + + Updates the permissions on a serving endpoint. Serving endpoints can inherit permissions from their + root object. + + :param serving_endpoint_id: str + The serving endpoint for which to get or manage permissions. + :param access_control_list: List[:class:`ServingEndpointAccessControlRequest`] (optional) + + :returns: :class:`ServingEndpointPermissions` + + + .. py:method:: wait_get_serving_endpoint_not_updating(name: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[ServingEndpointDetailed], None]]) -> ServingEndpointDetailed diff --git a/docs/workspace/settings/credentials_manager.rst b/docs/workspace/settings/credentials_manager.rst new file mode 100644 index 000000000..1767ba34e --- /dev/null +++ b/docs/workspace/settings/credentials_manager.rst @@ -0,0 +1,23 @@ +``w.credentials_manager``: Credentials Manager +============================================== +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: CredentialsManagerAPI + + Credentials manager interacts with with Identity Providers to to perform token exchanges using stored + credentials and refresh tokens. + + .. py:method:: exchange_token(partition_id: PartitionId, token_type: List[TokenType], scopes: List[str]) -> ExchangeTokenResponse + + Exchange token. + + Exchange tokens with an Identity Provider to get a new access token. It allowes specifying scopes to + determine token permissions. + + :param partition_id: :class:`PartitionId` + :param token_type: List[:class:`TokenType`] + :param scopes: List[str] + Array of scopes for the token request. + + :returns: :class:`ExchangeTokenResponse` + \ No newline at end of file diff --git a/docs/workspace/settings/index.rst b/docs/workspace/settings/index.rst new file mode 100644 index 000000000..a524f671d --- /dev/null +++ b/docs/workspace/settings/index.rst @@ -0,0 +1,15 @@ + +Settings +======== + +Manage security settings for Accounts and Workspaces + +.. toctree:: + :maxdepth: 1 + + credentials_manager + ip_access_lists + settings + token_management + tokens + workspace_conf \ No newline at end of file diff --git a/docs/workspace/settings/ip_access_lists.rst b/docs/workspace/settings/ip_access_lists.rst new file mode 100644 index 000000000..a265c5943 --- /dev/null +++ b/docs/workspace/settings/ip_access_lists.rst @@ -0,0 +1,229 @@ +``w.ip_access_lists``: IP Access Lists +====================================== +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: IpAccessListsAPI + + IP Access List enables admins to configure IP access lists. + + IP access lists affect web application access and REST API access to this workspace only. If the feature + is disabled for a workspace, all access is allowed for this workspace. There is support for allow lists + (inclusion) and block lists (exclusion). + + When a connection is attempted: 1. **First, all block lists are checked.** If the connection IP address + matches any block list, the connection is rejected. 2. **If the connection was not rejected by block + lists**, the IP address is compared with the allow lists. + + If there is at least one allow list for the workspace, the connection is allowed only if the IP address + matches an allow list. If there are no allow lists for the workspace, all IP addresses are allowed. + + For all allow lists and block lists combined, the workspace supports a maximum of 1000 IP/CIDR values, + where one CIDR counts as a single value. + + After changes to the IP access list feature, it can take a few minutes for changes to take effect. + + .. py:method:: create(label: str, list_type: ListType [, ip_addresses: Optional[List[str]]]) -> CreateIpAccessListResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import settings + + w = WorkspaceClient() + + created = w.ip_access_lists.create(label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/16"], + list_type=settings.ListType.BLOCK) + + # cleanup + w.ip_access_lists.delete(ip_access_list_id=created.ip_access_list.list_id) + + Create access list. + + Creates an IP access list for this workspace. + + A list can be an allow list or a block list. See the top of this file for a description of how the + server treats allow lists and block lists at runtime. + + When creating or updating an IP access list: + + * For all allow lists and block lists combined, the API supports a maximum of 1000 IP/CIDR values, + where one CIDR counts as a single value. Attempts to exceed that number return error 400 with + `error_code` value `QUOTA_EXCEEDED`. * If the new list would block the calling user's current IP, + error 400 is returned with `error_code` value `INVALID_STATE`. + + It can take a few minutes for the changes to take effect. **Note**: Your new IP access list has no + effect until you enable the feature. See :method:workspaceconf/setStatus + + :param label: str + Label for the IP access list. This **cannot** be empty. + :param list_type: :class:`ListType` + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. + :param ip_addresses: List[str] (optional) + + :returns: :class:`CreateIpAccessListResponse` + + + .. py:method:: delete(ip_access_list_id: str) + + Delete access list. + + Deletes an IP access list, specified by its list ID. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + + + + + .. py:method:: get(ip_access_list_id: str) -> FetchIpAccessListResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import settings + + w = WorkspaceClient() + + created = w.ip_access_lists.create(label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/16"], + list_type=settings.ListType.BLOCK) + + by_id = w.ip_access_lists.get(ip_access_list_id=created.ip_access_list.list_id) + + # cleanup + w.ip_access_lists.delete(ip_access_list_id=created.ip_access_list.list_id) + + Get access list. + + Gets an IP access list, specified by its list ID. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + + :returns: :class:`FetchIpAccessListResponse` + + + .. py:method:: list() -> Iterator[IpAccessListInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.ip_access_lists.list() + + Get access lists. + + Gets all IP access lists for the specified workspace. + + :returns: Iterator over :class:`IpAccessListInfo` + + + .. py:method:: replace(ip_access_list_id: str, label: str, list_type: ListType, enabled: bool [, ip_addresses: Optional[List[str]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import settings + + w = WorkspaceClient() + + created = w.ip_access_lists.create(label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/16"], + list_type=settings.ListType.BLOCK) + + w.ip_access_lists.replace(ip_access_list_id=created.ip_access_list.list_id, + label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/24"], + list_type=settings.ListType.BLOCK, + enabled=False) + + # cleanup + w.ip_access_lists.delete(ip_access_list_id=created.ip_access_list.list_id) + + Replace access list. + + Replaces an IP access list, specified by its ID. + + A list can include allow lists and block lists. See the top of this file for a description of how the + server treats allow lists and block lists at run time. When replacing an IP access list: * For all + allow lists and block lists combined, the API supports a maximum of 1000 IP/CIDR values, where one + CIDR counts as a single value. Attempts to exceed that number return error 400 with `error_code` value + `QUOTA_EXCEEDED`. * If the resulting list would block the calling user's current IP, error 400 is + returned with `error_code` value `INVALID_STATE`. It can take a few minutes for the changes to take + effect. Note that your resulting IP access list has no effect until you enable the feature. See + :method:workspaceconf/setStatus. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + :param label: str + Label for the IP access list. This **cannot** be empty. + :param list_type: :class:`ListType` + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. + :param enabled: bool + Specifies whether this IP access list is enabled. + :param ip_addresses: List[str] (optional) + + + + + .. py:method:: update(ip_access_list_id: str [, enabled: Optional[bool], ip_addresses: Optional[List[str]], label: Optional[str], list_type: Optional[ListType]]) + + Update access list. + + Updates an existing IP access list, specified by its ID. + + A list can include allow lists and block lists. See the top of this file for a description of how the + server treats allow lists and block lists at run time. + + When updating an IP access list: + + * For all allow lists and block lists combined, the API supports a maximum of 1000 IP/CIDR values, + where one CIDR counts as a single value. Attempts to exceed that number return error 400 with + `error_code` value `QUOTA_EXCEEDED`. * If the updated list would block the calling user's current IP, + error 400 is returned with `error_code` value `INVALID_STATE`. + + It can take a few minutes for the changes to take effect. Note that your resulting IP access list has + no effect until you enable the feature. See :method:workspaceconf/setStatus. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + :param enabled: bool (optional) + Specifies whether this IP access list is enabled. + :param ip_addresses: List[str] (optional) + :param label: str (optional) + Label for the IP access list. This **cannot** be empty. + :param list_type: :class:`ListType` (optional) + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. + + + \ No newline at end of file diff --git a/docs/workspace/settings/settings.rst b/docs/workspace/settings/settings.rst new file mode 100644 index 000000000..0395213d3 --- /dev/null +++ b/docs/workspace/settings/settings.rst @@ -0,0 +1,83 @@ +``w.settings``: Default Namespace +================================= +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: SettingsAPI + + The default namespace setting API allows users to configure the default namespace for a Databricks + workspace. + + Through this API, users can retrieve, set, or modify the default namespace used when queries do not + reference a fully qualified three-level name. For example, if you use the API to set 'retail_prod' as the + default catalog, then a query 'SELECT * FROM myTable' would reference the object + 'retail_prod.default.myTable' (the schema 'default' is always assumed). + + This setting requires a restart of clusters and SQL warehouses to take effect. Additionally, the default + namespace only applies when using Unity Catalog-enabled compute. + + .. py:method:: delete_default_workspace_namespace(etag: str) -> DeleteDefaultWorkspaceNamespaceResponse + + Delete the default namespace setting. + + Deletes the default namespace setting for the workspace. A fresh etag needs to be provided in DELETE + requests (as a query parameter). The etag can be retrieved by making a GET request before the DELETE + request. If the setting is updated/deleted concurrently, DELETE will fail with 409 and the request + will need to be retried by using the fresh etag in the 409 response. + + :param etag: str + etag used for versioning. The response is at least as fresh as the eTag provided. This is used for + optimistic concurrency control as a way to help prevent simultaneous writes of a setting overwriting + each other. It is strongly suggested that systems make use of the etag in the read -> delete pattern + to perform setting deletions in order to avoid race conditions. That is, get an etag from a GET + request, and pass it with the DELETE request to identify the rule set version you are deleting. + + :returns: :class:`DeleteDefaultWorkspaceNamespaceResponse` + + + .. py:method:: read_default_workspace_namespace(etag: str) -> DefaultNamespaceSetting + + Get the default namespace setting. + + Gets the default namespace setting. + + :param etag: str + etag used for versioning. The response is at least as fresh as the eTag provided. This is used for + optimistic concurrency control as a way to help prevent simultaneous writes of a setting overwriting + each other. It is strongly suggested that systems make use of the etag in the read -> delete pattern + to perform setting deletions in order to avoid race conditions. That is, get an etag from a GET + request, and pass it with the DELETE request to identify the rule set version you are deleting. + + :returns: :class:`DefaultNamespaceSetting` + + + .. py:method:: update_default_workspace_namespace( [, allow_missing: Optional[bool], field_mask: Optional[str], setting: Optional[DefaultNamespaceSetting]]) -> DefaultNamespaceSetting + + Update the default namespace setting. + + Updates the default namespace setting for the workspace. A fresh etag needs to be provided in PATCH + requests (as part of the setting field). The etag can be retrieved by making a GET request before the + PATCH request. Note that if the setting does not exist, GET will return a NOT_FOUND error and the etag + will be present in the error response, which should be set in the PATCH request. If the setting is + updated concurrently, PATCH will fail with 409 and the request will need to be retried by using the + fresh etag in the 409 response. + + :param allow_missing: bool (optional) + This should always be set to true for Settings API. Added for AIP compliance. + :param field_mask: str (optional) + Field mask is required to be passed into the PATCH request. Field mask specifies which fields of the + setting payload will be updated. For example, for Default Namespace setting, the field mask is + supposed to contain fields from the DefaultNamespaceSetting.namespace schema. + + The field mask needs to be supplied as single string. To specify multiple fields in the field mask, + use comma as the seperator (no space). + :param setting: :class:`DefaultNamespaceSetting` (optional) + This represents the setting configuration for the default namespace in the Databricks workspace. + Setting the default catalog for the workspace determines the catalog that is used when queries do + not reference a fully qualified 3 level name. For example, if the default catalog is set to + 'retail_prod' then a query 'SELECT * FROM myTable' would reference the object + 'retail_prod.default.myTable' (the schema 'default' is always assumed). This setting requires a + restart of clusters and SQL warehouses to take effect. Additionally, the default namespace only + applies when using Unity Catalog-enabled compute. + + :returns: :class:`DefaultNamespaceSetting` + \ No newline at end of file diff --git a/docs/workspace/settings/token_management.rst b/docs/workspace/settings/token_management.rst new file mode 100644 index 000000000..cf1860419 --- /dev/null +++ b/docs/workspace/settings/token_management.rst @@ -0,0 +1,162 @@ +``w.token_management``: Token management +======================================== +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: TokenManagementAPI + + Enables administrators to get all tokens and delete tokens for other users. Admins can either get every + token, get a specific token by ID, or get all tokens for a particular user. + + .. py:method:: create_obo_token(application_id: str, lifetime_seconds: int [, comment: Optional[str]]) -> CreateOboTokenResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + groups = w.groups.group_display_name_to_id_map(iam.ListGroupsRequest()) + + spn = w.service_principals.create(display_name=f'sdk-{time.time_ns()}', + groups=[iam.ComplexValue(value=groups["admins"])]) + + obo = w.token_management.create_obo_token(application_id=spn.application_id, lifetime_seconds=60) + + # cleanup + w.service_principals.delete(id=spn.id) + w.token_management.delete(token_id=obo.token_info.token_id) + + Create on-behalf token. + + Creates a token on behalf of a service principal. + + :param application_id: str + Application ID of the service principal. + :param lifetime_seconds: int + The number of seconds before the token expires. + :param comment: str (optional) + Comment that describes the purpose of the token. + + :returns: :class:`CreateOboTokenResponse` + + + .. py:method:: delete(token_id: str) + + Delete a token. + + Deletes a token, specified by its ID. + + :param token_id: str + The ID of the token to get. + + + + + .. py:method:: get(token_id: str) -> TokenInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + groups = w.groups.group_display_name_to_id_map(iam.ListGroupsRequest()) + + spn = w.service_principals.create(display_name=f'sdk-{time.time_ns()}', + groups=[iam.ComplexValue(value=groups["admins"])]) + + obo = w.token_management.create_obo_token(application_id=spn.application_id, lifetime_seconds=60) + + by_id = w.token_management.get(token_id=obo.token_info.token_id) + + # cleanup + w.service_principals.delete(id=spn.id) + w.token_management.delete(token_id=obo.token_info.token_id) + + Get token info. + + Gets information about a token, specified by its ID. + + :param token_id: str + The ID of the token to get. + + :returns: :class:`TokenInfo` + + + .. py:method:: get_permission_levels() -> GetTokenPermissionLevelsResponse + + Get token permission levels. + + Gets the permission levels that a user can have on an object. + + :returns: :class:`GetTokenPermissionLevelsResponse` + + + .. py:method:: get_permissions() -> TokenPermissions + + Get token permissions. + + Gets the permissions of all tokens. Tokens can inherit permissions from their root object. + + :returns: :class:`TokenPermissions` + + + .. py:method:: list( [, created_by_id: Optional[str], created_by_username: Optional[str]]) -> Iterator[TokenInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import settings + + w = WorkspaceClient() + + all = w.token_management.list(settings.ListTokenManagementRequest()) + + List all tokens. + + Lists all tokens associated with the specified workspace or user. + + :param created_by_id: str (optional) + User ID of the user that created the token. + :param created_by_username: str (optional) + Username of the user that created the token. + + :returns: Iterator over :class:`TokenInfo` + + + .. py:method:: set_permissions( [, access_control_list: Optional[List[TokenAccessControlRequest]]]) -> TokenPermissions + + Set token permissions. + + Sets permissions on all tokens. Tokens can inherit permissions from their root object. + + :param access_control_list: List[:class:`TokenAccessControlRequest`] (optional) + + :returns: :class:`TokenPermissions` + + + .. py:method:: update_permissions( [, access_control_list: Optional[List[TokenAccessControlRequest]]]) -> TokenPermissions + + Update token permissions. + + Updates the permissions on all tokens. Tokens can inherit permissions from their root object. + + :param access_control_list: List[:class:`TokenAccessControlRequest`] (optional) + + :returns: :class:`TokenPermissions` + \ No newline at end of file diff --git a/docs/workspace/settings/tokens.rst b/docs/workspace/settings/tokens.rst new file mode 100644 index 000000000..899db00d1 --- /dev/null +++ b/docs/workspace/settings/tokens.rst @@ -0,0 +1,76 @@ +``w.tokens``: Token +=================== +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: TokensAPI + + The Token API allows you to create, list, and revoke tokens that can be used to authenticate and access + Databricks REST APIs. + + .. py:method:: create( [, comment: Optional[str], lifetime_seconds: Optional[int]]) -> CreateTokenResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + token = w.tokens.create(comment=f'sdk-{time.time_ns()}', lifetime_seconds=300) + + # cleanup + w.tokens.delete(token_id=token.token_info.token_id) + + Create a user token. + + Creates and returns a token for a user. If this call is made through token authentication, it creates + a token with the same client ID as the authenticated token. If the user's token quota is exceeded, + this call returns an error **QUOTA_EXCEEDED**. + + :param comment: str (optional) + Optional description to attach to the token. + :param lifetime_seconds: int (optional) + The lifetime of the token, in seconds. + + If the lifetime is not specified, this token remains valid indefinitely. + + :returns: :class:`CreateTokenResponse` + + + .. py:method:: delete(token_id: str) + + Revoke token. + + Revokes an access token. + + If a token with the specified ID is not valid, this call returns an error **RESOURCE_DOES_NOT_EXIST**. + + :param token_id: str + The ID of the token to be revoked. + + + + + .. py:method:: list() -> Iterator[PublicTokenInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.tokens.list() + + List tokens. + + Lists all the valid tokens for a user-workspace pair. + + :returns: Iterator over :class:`PublicTokenInfo` + \ No newline at end of file diff --git a/docs/workspace/settings/workspace_conf.rst b/docs/workspace/settings/workspace_conf.rst new file mode 100644 index 000000000..892819383 --- /dev/null +++ b/docs/workspace/settings/workspace_conf.rst @@ -0,0 +1,39 @@ +``w.workspace_conf``: Workspace Conf +==================================== +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: WorkspaceConfAPI + + This API allows updating known workspace settings for advanced users. + + .. py:method:: get_status(keys: str) -> WorkspaceConf + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + conf = w.workspace_conf.get_status(keys="enableWorkspaceFilesystem") + + Check configuration status. + + Gets the configuration status for a workspace. + + :param keys: str + + :returns: Dict[str,str] + + + .. py:method:: set_status() + + Enable/disable features. + + Sets the configuration status for a workspace, including enabling or disabling it. + + + + \ No newline at end of file diff --git a/docs/workspace/sharing/clean_rooms.rst b/docs/workspace/sharing/clean_rooms.rst new file mode 100644 index 000000000..827b39f0d --- /dev/null +++ b/docs/workspace/sharing/clean_rooms.rst @@ -0,0 +1,103 @@ +``w.clean_rooms``: Clean Rooms +============================== +.. currentmodule:: databricks.sdk.service.sharing + +.. py:class:: CleanRoomsAPI + + A clean room is a secure, privacy-protecting environment where two or more parties can share sensitive + enterprise data, including customer data, for measurements, insights, activation and other use cases. + + To create clean rooms, you must be a metastore admin or a user with the **CREATE_CLEAN_ROOM** privilege. + + .. py:method:: create(name: str, remote_detailed_info: CentralCleanRoomInfo [, comment: Optional[str]]) -> CleanRoomInfo + + Create a clean room. + + Creates a new clean room with specified colaborators. The caller must be a metastore admin or have the + **CREATE_CLEAN_ROOM** privilege on the metastore. + + :param name: str + Name of the clean room. + :param remote_detailed_info: :class:`CentralCleanRoomInfo` + Central clean room details. + :param comment: str (optional) + User-provided free-form text description. + + :returns: :class:`CleanRoomInfo` + + + .. py:method:: delete(name_arg: str) + + Delete a clean room. + + Deletes a data object clean room from the metastore. The caller must be an owner of the clean room. + + :param name_arg: str + The name of the clean room. + + + + + .. py:method:: get(name_arg: str [, include_remote_details: Optional[bool]]) -> CleanRoomInfo + + Get a clean room. + + Gets a data object clean room from the metastore. The caller must be a metastore admin or the owner of + the clean room. + + :param name_arg: str + The name of the clean room. + :param include_remote_details: bool (optional) + Whether to include remote details (central) on the clean room. + + :returns: :class:`CleanRoomInfo` + + + .. py:method:: list( [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[CleanRoomInfo] + + List clean rooms. + + Gets an array of data object clean rooms from the metastore. The caller must be a metastore admin or + the owner of the clean room. There is no guarantee of a specific ordering of the elements in the + array. + + :param max_results: int (optional) + Maximum number of clean rooms to return. If not set, all the clean rooms are returned (not + recommended). - when set to a value greater than 0, the page length is the minimum of this value and + a server configured value; - when set to 0, the page length is set to a server configured value + (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`CleanRoomInfo` + + + .. py:method:: update(name_arg: str [, catalog_updates: Optional[List[CleanRoomCatalogUpdate]], comment: Optional[str], owner: Optional[str]]) -> CleanRoomInfo + + Update a clean room. + + Updates the clean room with the changes and data objects in the request. The caller must be the owner + of the clean room or a metastore admin. + + When the caller is a metastore admin, only the __owner__ field can be updated. + + In the case that the clean room name is changed **updateCleanRoom** requires that the caller is both + the clean room owner and a metastore admin. + + For each table that is added through this method, the clean room owner must also have **SELECT** + privilege on the table. The privilege must be maintained indefinitely for recipients to be able to + access the table. Typically, you should use a group as the clean room owner. + + Table removals through **update** do not require additional privileges. + + :param name_arg: str + The name of the clean room. + :param catalog_updates: List[:class:`CleanRoomCatalogUpdate`] (optional) + Array of shared data object updates. + :param comment: str (optional) + User-provided free-form text description. + :param owner: str (optional) + Username of current owner of clean room. + + :returns: :class:`CleanRoomInfo` + \ No newline at end of file diff --git a/docs/workspace/sharing/index.rst b/docs/workspace/sharing/index.rst new file mode 100644 index 000000000..e012eb548 --- /dev/null +++ b/docs/workspace/sharing/index.rst @@ -0,0 +1,14 @@ + +Delta Sharing +============= + +Configure data sharing with Unity Catalog for providers, recipients, and shares + +.. toctree:: + :maxdepth: 1 + + clean_rooms + providers + recipient_activation + recipients + shares \ No newline at end of file diff --git a/docs/workspace/sharing/providers.rst b/docs/workspace/sharing/providers.rst new file mode 100644 index 000000000..1382b5a92 --- /dev/null +++ b/docs/workspace/sharing/providers.rst @@ -0,0 +1,214 @@ +``w.providers``: Providers +========================== +.. currentmodule:: databricks.sdk.service.sharing + +.. py:class:: ProvidersAPI + + A data provider is an object representing the organization in the real world who shares the data. A + provider contains shares which further contain the shared data. + + .. py:method:: create(name: str, authentication_type: AuthenticationType [, comment: Optional[str], recipient_profile_str: Optional[str]]) -> ProviderInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + public_share_recipient = """{ + "shareCredentialsVersion":1, + "bearerToken":"dapiabcdefghijklmonpqrstuvwxyz", + "endpoint":"https://sharing.delta.io/delta-sharing/" + } + """ + + created = w.providers.create(name=f'sdk-{time.time_ns()}', recipient_profile_str=public_share_recipient) + + # cleanup + w.providers.delete(name=created.name) + + Create an auth provider. + + Creates a new authentication provider minimally based on a name and authentication type. The caller + must be an admin on the metastore. + + :param name: str + The name of the Provider. + :param authentication_type: :class:`AuthenticationType` + The delta sharing authentication type. + :param comment: str (optional) + Description about the provider. + :param recipient_profile_str: str (optional) + This field is required when the __authentication_type__ is **TOKEN** or not provided. + + :returns: :class:`ProviderInfo` + + + .. py:method:: delete(name: str) + + Delete a provider. + + Deletes an authentication provider, if the caller is a metastore admin or is the owner of the + provider. + + :param name: str + Name of the provider. + + + + + .. py:method:: get(name: str) -> ProviderInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + public_share_recipient = """{ + "shareCredentialsVersion":1, + "bearerToken":"dapiabcdefghijklmonpqrstuvwxyz", + "endpoint":"https://sharing.delta.io/delta-sharing/" + } + """ + + created = w.providers.create(name=f'sdk-{time.time_ns()}', recipient_profile_str=public_share_recipient) + + _ = w.providers.get(name=created.name) + + # cleanup + w.providers.delete(name=created.name) + + Get a provider. + + Gets a specific authentication provider. The caller must supply the name of the provider, and must + either be a metastore admin or the owner of the provider. + + :param name: str + Name of the provider. + + :returns: :class:`ProviderInfo` + + + .. py:method:: list( [, data_provider_global_metastore_id: Optional[str]]) -> Iterator[ProviderInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sharing + + w = WorkspaceClient() + + all = w.providers.list(sharing.ListProvidersRequest()) + + List providers. + + Gets an array of available authentication providers. The caller must either be a metastore admin or + the owner of the providers. Providers not owned by the caller are not included in the response. There + is no guarantee of a specific ordering of the elements in the array. + + :param data_provider_global_metastore_id: str (optional) + If not provided, all providers will be returned. If no providers exist with this ID, no results will + be returned. + + :returns: Iterator over :class:`ProviderInfo` + + + .. py:method:: list_shares(name: str) -> Iterator[ProviderShare] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + public_share_recipient = """{ + "shareCredentialsVersion":1, + "bearerToken":"dapiabcdefghijklmonpqrstuvwxyz", + "endpoint":"https://sharing.delta.io/delta-sharing/" + } + """ + + created = w.providers.create(name=f'sdk-{time.time_ns()}', recipient_profile_str=public_share_recipient) + + shares = w.providers.list_shares(name=created.name) + + # cleanup + w.providers.delete(name=created.name) + + List shares by Provider. + + Gets an array of a specified provider's shares within the metastore where: + + * the caller is a metastore admin, or * the caller is the owner. + + :param name: str + Name of the provider in which to list shares. + + :returns: Iterator over :class:`ProviderShare` + + + .. py:method:: update(name: str [, comment: Optional[str], new_name: Optional[str], owner: Optional[str], recipient_profile_str: Optional[str]]) -> ProviderInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + public_share_recipient = """{ + "shareCredentialsVersion":1, + "bearerToken":"dapiabcdefghijklmonpqrstuvwxyz", + "endpoint":"https://sharing.delta.io/delta-sharing/" + } + """ + + created = w.providers.create(name=f'sdk-{time.time_ns()}', recipient_profile_str=public_share_recipient) + + _ = w.providers.update(name=created.name, comment="Comment for update") + + # cleanup + w.providers.delete(name=created.name) + + Update a provider. + + Updates the information for an authentication provider, if the caller is a metastore admin or is the + owner of the provider. If the update changes the provider name, the caller must be both a metastore + admin and the owner of the provider. + + :param name: str + Name of the provider. + :param comment: str (optional) + Description about the provider. + :param new_name: str (optional) + New name for the provider. + :param owner: str (optional) + Username of Provider owner. + :param recipient_profile_str: str (optional) + This field is required when the __authentication_type__ is **TOKEN** or not provided. + + :returns: :class:`ProviderInfo` + \ No newline at end of file diff --git a/docs/workspace/sharing/recipient_activation.rst b/docs/workspace/sharing/recipient_activation.rst new file mode 100644 index 000000000..2c214d9c0 --- /dev/null +++ b/docs/workspace/sharing/recipient_activation.rst @@ -0,0 +1,37 @@ +``w.recipient_activation``: Recipient Activation +================================================ +.. currentmodule:: databricks.sdk.service.sharing + +.. py:class:: RecipientActivationAPI + + The Recipient Activation API is only applicable in the open sharing model where the recipient object has + the authentication type of `TOKEN`. The data recipient follows the activation link shared by the data + provider to download the credential file that includes the access token. The recipient will then use the + credential file to establish a secure connection with the provider to receive the shared data. + + Note that you can download the credential file only once. Recipients should treat the downloaded + credential as a secret and must not share it outside of their organization. + + .. py:method:: get_activation_url_info(activation_url: str) + + Get a share activation URL. + + Gets an activation URL for a share. + + :param activation_url: str + The one time activation url. It also accepts activation token. + + + + + .. py:method:: retrieve_token(activation_url: str) -> RetrieveTokenResponse + + Get an access token. + + Retrieve access token with an activation url. This is a public API without any authentication. + + :param activation_url: str + The one time activation url. It also accepts activation token. + + :returns: :class:`RetrieveTokenResponse` + \ No newline at end of file diff --git a/docs/workspace/sharing/recipients.rst b/docs/workspace/sharing/recipients.rst new file mode 100644 index 000000000..86a004d36 --- /dev/null +++ b/docs/workspace/sharing/recipients.rst @@ -0,0 +1,247 @@ +``w.recipients``: Recipients +============================ +.. currentmodule:: databricks.sdk.service.sharing + +.. py:class:: RecipientsAPI + + A recipient is an object you create using :method:recipients/create to represent an organization which you + want to allow access shares. The way how sharing works differs depending on whether or not your recipient + has access to a Databricks workspace that is enabled for Unity Catalog: + + - For recipients with access to a Databricks workspace that is enabled for Unity Catalog, you can create a + recipient object along with a unique sharing identifier you get from the recipient. The sharing identifier + is the key identifier that enables the secure connection. This sharing mode is called + **Databricks-to-Databricks sharing**. + + - For recipients without access to a Databricks workspace that is enabled for Unity Catalog, when you + create a recipient object, Databricks generates an activation link you can send to the recipient. The + recipient follows the activation link to download the credential file, and then uses the credential file + to establish a secure connection to receive the shared data. This sharing mode is called **open sharing**. + + .. py:method:: create(name: str, authentication_type: AuthenticationType [, comment: Optional[str], data_recipient_global_metastore_id: Optional[str], ip_access_list: Optional[IpAccessList], owner: Optional[str], properties_kvpairs: Optional[SecurablePropertiesKvPairs], sharing_code: Optional[str]]) -> RecipientInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.recipients.create(name=f'sdk-{time.time_ns()}') + + # cleanup + w.recipients.delete(name=created.name) + + Create a share recipient. + + Creates a new recipient with the delta sharing authentication type in the metastore. The caller must + be a metastore admin or has the **CREATE_RECIPIENT** privilege on the metastore. + + :param name: str + Name of Recipient. + :param authentication_type: :class:`AuthenticationType` + The delta sharing authentication type. + :param comment: str (optional) + Description about the recipient. + :param data_recipient_global_metastore_id: str (optional) + The global Unity Catalog metastore id provided by the data recipient. This field is required when + the __authentication_type__ is **DATABRICKS**. The identifier is of format + __cloud__:__region__:__metastore-uuid__. + :param ip_access_list: :class:`IpAccessList` (optional) + IP Access List + :param owner: str (optional) + Username of the recipient owner. + :param properties_kvpairs: :class:`SecurablePropertiesKvPairs` (optional) + Recipient properties as map of string key-value pairs. + :param sharing_code: str (optional) + The one-time sharing code provided by the data recipient. This field is required when the + __authentication_type__ is **DATABRICKS**. + + :returns: :class:`RecipientInfo` + + + .. py:method:: delete(name: str) + + Delete a share recipient. + + Deletes the specified recipient from the metastore. The caller must be the owner of the recipient. + + :param name: str + Name of the recipient. + + + + + .. py:method:: get(name: str) -> RecipientInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.recipients.create(name=f'sdk-{time.time_ns()}') + + _ = w.recipients.get(name=created.name) + + # cleanup + w.recipients.delete(name=created.name) + + Get a share recipient. + + Gets a share recipient from the metastore if: + + * the caller is the owner of the share recipient, or: * is a metastore admin + + :param name: str + Name of the recipient. + + :returns: :class:`RecipientInfo` + + + .. py:method:: list( [, data_recipient_global_metastore_id: Optional[str]]) -> Iterator[RecipientInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sharing + + w = WorkspaceClient() + + all = w.recipients.list(sharing.ListRecipientsRequest()) + + List share recipients. + + Gets an array of all share recipients within the current metastore where: + + * the caller is a metastore admin, or * the caller is the owner. There is no guarantee of a specific + ordering of the elements in the array. + + :param data_recipient_global_metastore_id: str (optional) + If not provided, all recipients will be returned. If no recipients exist with this ID, no results + will be returned. + + :returns: Iterator over :class:`RecipientInfo` + + + .. py:method:: rotate_token(name: str, existing_token_expire_in_seconds: int) -> RecipientInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.recipients.create(name=f'sdk-{time.time_ns()}') + + recipient_info = w.recipients.rotate_token(name=created.name, existing_token_expire_in_seconds=0) + + # cleanup + w.recipients.delete(name=created.name) + + Rotate a token. + + Refreshes the specified recipient's delta sharing authentication token with the provided token info. + The caller must be the owner of the recipient. + + :param name: str + The name of the recipient. + :param existing_token_expire_in_seconds: int + The expiration time of the bearer token in ISO 8601 format. This will set the expiration_time of + existing token only to a smaller timestamp, it cannot extend the expiration_time. Use 0 to expire + the existing token immediately, negative number will return an error. + + :returns: :class:`RecipientInfo` + + + .. py:method:: share_permissions(name: str) -> GetRecipientSharePermissionsResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.recipients.create(name=f'sdk-{time.time_ns()}') + + share_permissions = w.recipients.share_permissions(name=created.name) + + # cleanup + w.recipients.delete(name=created.name) + + Get recipient share permissions. + + Gets the share permissions for the specified Recipient. The caller must be a metastore admin or the + owner of the Recipient. + + :param name: str + The name of the Recipient. + + :returns: :class:`GetRecipientSharePermissionsResponse` + + + .. py:method:: update(name: str [, comment: Optional[str], ip_access_list: Optional[IpAccessList], new_name: Optional[str], owner: Optional[str], properties_kvpairs: Optional[SecurablePropertiesKvPairs]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.recipients.create(name=f'sdk-{time.time_ns()}') + + w.recipients.update(name=created.name, comment=f'sdk-{time.time_ns()}') + + # cleanup + w.recipients.delete(name=created.name) + + Update a share recipient. + + Updates an existing recipient in the metastore. The caller must be a metastore admin or the owner of + the recipient. If the recipient name will be updated, the user must be both a metastore admin and the + owner of the recipient. + + :param name: str + Name of the recipient. + :param comment: str (optional) + Description about the recipient. + :param ip_access_list: :class:`IpAccessList` (optional) + IP Access List + :param new_name: str (optional) + New name for the recipient. + :param owner: str (optional) + Username of the recipient owner. + :param properties_kvpairs: :class:`SecurablePropertiesKvPairs` (optional) + Recipient properties as map of string key-value pairs. When provided in update request, the + specified properties will override the existing properties. To add and remove properties, one would + need to perform a read-modify-write. + + + \ No newline at end of file diff --git a/docs/workspace/sharing/shares.rst b/docs/workspace/sharing/shares.rst new file mode 100644 index 000000000..63c9b1ebe --- /dev/null +++ b/docs/workspace/sharing/shares.rst @@ -0,0 +1,211 @@ +``w.shares``: Shares +==================== +.. currentmodule:: databricks.sdk.service.sharing + +.. py:class:: SharesAPI + + A share is a container instantiated with :method:shares/create. Once created you can iteratively register + a collection of existing data assets defined within the metastore using :method:shares/update. You can + register data assets under their original name, qualified by their original schema, or provide alternate + exposed names. + + .. py:method:: create(name: str [, comment: Optional[str]]) -> ShareInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created_share = w.shares.create(name=f'sdk-{time.time_ns()}') + + # cleanup + w.shares.delete(name=created_share.name) + + Create a share. + + Creates a new share for data objects. Data objects can be added after creation with **update**. The + caller must be a metastore admin or have the **CREATE_SHARE** privilege on the metastore. + + :param name: str + Name of the share. + :param comment: str (optional) + User-provided free-form text description. + + :returns: :class:`ShareInfo` + + + .. py:method:: delete(name: str) + + Delete a share. + + Deletes a data object share from the metastore. The caller must be an owner of the share. + + :param name: str + The name of the share. + + + + + .. py:method:: get(name: str [, include_shared_data: Optional[bool]]) -> ShareInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created_share = w.shares.create(name=f'sdk-{time.time_ns()}') + + _ = w.shares.get(name=created_share.name) + + # cleanup + w.shares.delete(name=created_share.name) + + Get a share. + + Gets a data object share from the metastore. The caller must be a metastore admin or the owner of the + share. + + :param name: str + The name of the share. + :param include_shared_data: bool (optional) + Query for data to include in the share. + + :returns: :class:`ShareInfo` + + + .. py:method:: list() -> Iterator[ShareInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.shares.list() + + List shares. + + Gets an array of data object shares from the metastore. The caller must be a metastore admin or the + owner of the share. There is no guarantee of a specific ordering of the elements in the array. + + :returns: Iterator over :class:`ShareInfo` + + + .. py:method:: share_permissions(name: str) -> catalog.PermissionsList + + Get permissions. + + Gets the permissions for a data share from the metastore. The caller must be a metastore admin or the + owner of the share. + + :param name: str + The name of the share. + + :returns: :class:`PermissionsList` + + + .. py:method:: update(name: str [, comment: Optional[str], new_name: Optional[str], owner: Optional[str], updates: Optional[List[SharedDataObjectUpdate]]]) -> ShareInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sharing + + w = WorkspaceClient() + + table_name = f'sdk-{time.time_ns()}' + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + _ = w.statement_execution.execute(warehouse_id=os.environ["TEST_DEFAULT_WAREHOUSE_ID"], + catalog=created_catalog.name, + schema=created_schema.name, + statement="CREATE TABLE %s AS SELECT 2+2 as four" % (table_name)).result() + + table_full_name = "%s.%s.%s" % (created_catalog.name, created_schema.name, table_name) + + created_share = w.shares.create(name=f'sdk-{time.time_ns()}') + + _ = w.shares.update(name=created_share.name, + updates=[ + sharing.SharedDataObjectUpdate(action=sharing.SharedDataObjectUpdateAction.ADD, + data_object=sharing.SharedDataObject( + name=table_full_name, data_object_type="TABLE")) + ]) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.tables.delete(full_name=table_full_name) + w.shares.delete(name=created_share.name) + + Update a share. + + Updates the share with the changes and data objects in the request. The caller must be the owner of + the share or a metastore admin. + + When the caller is a metastore admin, only the __owner__ field can be updated. + + In the case that the share name is changed, **updateShare** requires that the caller is both the share + owner and a metastore admin. + + For each table that is added through this method, the share owner must also have **SELECT** privilege + on the table. This privilege must be maintained indefinitely for recipients to be able to access the + table. Typically, you should use a group as the share owner. + + Table removals through **update** do not require additional privileges. + + :param name: str + The name of the share. + :param comment: str (optional) + User-provided free-form text description. + :param new_name: str (optional) + New name for the share. + :param owner: str (optional) + Username of current owner of share. + :param updates: List[:class:`SharedDataObjectUpdate`] (optional) + Array of shared data object updates. + + :returns: :class:`ShareInfo` + + + .. py:method:: update_permissions(name: str [, changes: Optional[List[catalog.PermissionsChange]]]) + + Update permissions. + + Updates the permissions for a data share in the metastore. The caller must be a metastore admin or an + owner of the share. + + For new recipient grants, the user must also be the owner of the recipients. recipient revocations do + not require additional privileges. + + :param name: str + The name of the share. + :param changes: List[:class:`PermissionsChange`] (optional) + Array of permission changes. + + + \ No newline at end of file diff --git a/docs/workspace/sql/alerts.rst b/docs/workspace/sql/alerts.rst new file mode 100644 index 000000000..49a518bda --- /dev/null +++ b/docs/workspace/sql/alerts.rst @@ -0,0 +1,183 @@ +``w.alerts``: Alerts +==================== +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: AlertsAPI + + The alerts API can be used to perform CRUD operations on alerts. An alert is a Databricks SQL object that + periodically runs a query, evaluates a condition of its result, and notifies one or more users and/or + notification destinations if the condition was met. Alerts can be scheduled using the `sql_task` type of + the Jobs API, e.g. :method:jobs/create. + + .. py:method:: create(name: str, options: AlertOptions, query_id: str [, parent: Optional[str], rearm: Optional[int]]) -> Alert + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sql + + w = WorkspaceClient() + + srcs = w.data_sources.list() + + query = w.queries.create(name=f'sdk-{time.time_ns()}', + data_source_id=srcs[0].id, + description="test query from Go SDK", + query="SELECT 1") + + alert = w.alerts.create(options=sql.AlertOptions(column="1", op="==", value="1"), + name=f'sdk-{time.time_ns()}', + query_id=query.id) + + # cleanup + w.queries.delete(query_id=query.id) + w.alerts.delete(alert_id=alert.id) + + Create an alert. + + Creates an alert. An alert is a Databricks SQL object that periodically runs a query, evaluates a + condition of its result, and notifies users or notification destinations if the condition was met. + + :param name: str + Name of the alert. + :param options: :class:`AlertOptions` + Alert configuration options. + :param query_id: str + Query ID. + :param parent: str (optional) + The identifier of the workspace folder containing the object. + :param rearm: int (optional) + Number of seconds after being triggered before the alert rearms itself and can be triggered again. + If `null`, alert will never be triggered again. + + :returns: :class:`Alert` + + + .. py:method:: delete(alert_id: str) + + Delete an alert. + + Deletes an alert. Deleted alerts are no longer accessible and cannot be restored. **Note:** Unlike + queries and dashboards, alerts cannot be moved to the trash. + + :param alert_id: str + + + + + .. py:method:: get(alert_id: str) -> Alert + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sql + + w = WorkspaceClient() + + srcs = w.data_sources.list() + + query = w.queries.create(name=f'sdk-{time.time_ns()}', + data_source_id=srcs[0].id, + description="test query from Go SDK", + query="SELECT 1") + + alert = w.alerts.create(options=sql.AlertOptions(column="1", op="==", value="1"), + name=f'sdk-{time.time_ns()}', + query_id=query.id) + + by_id = w.alerts.get(alert_id=alert.id) + + # cleanup + w.queries.delete(query_id=query.id) + w.alerts.delete(alert_id=alert.id) + + Get an alert. + + Gets an alert. + + :param alert_id: str + + :returns: :class:`Alert` + + + .. py:method:: list() -> Iterator[Alert] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.alerts.list() + + Get alerts. + + Gets a list of alerts. + + :returns: Iterator over :class:`Alert` + + + .. py:method:: update(alert_id: str, name: str, options: AlertOptions, query_id: str [, rearm: Optional[int]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sql + + w = WorkspaceClient() + + srcs = w.data_sources.list() + + query = w.queries.create(name=f'sdk-{time.time_ns()}', + data_source_id=srcs[0].id, + description="test query from Go SDK", + query="SELECT 1") + + alert = w.alerts.create(options=sql.AlertOptions(column="1", op="==", value="1"), + name=f'sdk-{time.time_ns()}', + query_id=query.id) + + w.alerts.update(options=sql.AlertOptions(column="1", op="==", value="1"), + alert_id=alert.id, + name=f'sdk-{time.time_ns()}', + query_id=query.id) + + # cleanup + w.queries.delete(query_id=query.id) + w.alerts.delete(alert_id=alert.id) + + Update an alert. + + Updates an alert. + + :param alert_id: str + :param name: str + Name of the alert. + :param options: :class:`AlertOptions` + Alert configuration options. + :param query_id: str + Query ID. + :param rearm: int (optional) + Number of seconds after being triggered before the alert rearms itself and can be triggered again. + If `null`, alert will never be triggered again. + + + \ No newline at end of file diff --git a/docs/workspace/sql/dashboard_widgets.rst b/docs/workspace/sql/dashboard_widgets.rst new file mode 100644 index 000000000..d4bbcde1d --- /dev/null +++ b/docs/workspace/sql/dashboard_widgets.rst @@ -0,0 +1,56 @@ +``w.dashboard_widgets``: Dashboard Widgets +========================================== +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: DashboardWidgetsAPI + + This is an evolving API that facilitates the addition and removal of widgets from existing dashboards + within the Databricks Workspace. Data structures may change over time. + + .. py:method:: create(dashboard_id: str, options: WidgetOptions, width: int [, text: Optional[str], visualization_id: Optional[str]]) -> Widget + + Add widget to a dashboard. + + :param dashboard_id: str + Dashboard ID returned by :method:dashboards/create. + :param options: :class:`WidgetOptions` + :param width: int + Width of a widget + :param text: str (optional) + If this is a textbox widget, the application displays this text. This field is ignored if the widget + contains a visualization in the `visualization` field. + :param visualization_id: str (optional) + Query Vizualization ID returned by :method:queryvisualizations/create. + + :returns: :class:`Widget` + + + .. py:method:: delete(id: str) + + Remove widget. + + :param id: str + Widget ID returned by :method:dashboardwidgets/create + + + + + .. py:method:: update(id: str, dashboard_id: str, options: WidgetOptions, width: int [, text: Optional[str], visualization_id: Optional[str]]) -> Widget + + Update existing widget. + + :param id: str + Widget ID returned by :method:dashboardwidgets/create + :param dashboard_id: str + Dashboard ID returned by :method:dashboards/create. + :param options: :class:`WidgetOptions` + :param width: int + Width of a widget + :param text: str (optional) + If this is a textbox widget, the application displays this text. This field is ignored if the widget + contains a visualization in the `visualization` field. + :param visualization_id: str (optional) + Query Vizualization ID returned by :method:queryvisualizations/create. + + :returns: :class:`Widget` + \ No newline at end of file diff --git a/docs/workspace/sql/dashboards.rst b/docs/workspace/sql/dashboards.rst new file mode 100644 index 000000000..29cafb7cc --- /dev/null +++ b/docs/workspace/sql/dashboards.rst @@ -0,0 +1,184 @@ +``w.dashboards``: Dashboards +============================ +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: DashboardsAPI + + In general, there is little need to modify dashboards using the API. However, it can be useful to use + dashboard objects to look-up a collection of related query IDs. The API can also be used to duplicate + multiple dashboards at once since you can get a dashboard definition with a GET request and then POST it + to create a new one. Dashboards can be scheduled using the `sql_task` type of the Jobs API, e.g. + :method:jobs/create. + + .. py:method:: create(name: str [, dashboard_filters_enabled: Optional[bool], is_favorite: Optional[bool], parent: Optional[str], run_as_role: Optional[RunAsRole], tags: Optional[List[str]]]) -> Dashboard + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.dashboards.create(name=f'sdk-{time.time_ns()}') + + # cleanup + w.dashboards.delete(dashboard_id=created.id) + + Create a dashboard object. + + :param name: str + The title of this dashboard that appears in list views and at the top of the dashboard page. + :param dashboard_filters_enabled: bool (optional) + Indicates whether the dashboard filters are enabled + :param is_favorite: bool (optional) + Indicates whether this dashboard object should appear in the current user's favorites list. + :param parent: str (optional) + The identifier of the workspace folder containing the object. + :param run_as_role: :class:`RunAsRole` (optional) + Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior) + :param tags: List[str] (optional) + + :returns: :class:`Dashboard` + + + .. py:method:: delete(dashboard_id: str) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.dashboards.create(name=f'sdk-{time.time_ns()}') + + w.dashboards.delete(dashboard_id=created.id) + + # cleanup + w.dashboards.delete(dashboard_id=created.id) + + Remove a dashboard. + + Moves a dashboard to the trash. Trashed dashboards do not appear in list views or searches, and cannot + be shared. + + :param dashboard_id: str + + + + + .. py:method:: get(dashboard_id: str) -> Dashboard + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.dashboards.create(name=f'sdk-{time.time_ns()}') + + by_id = w.dashboards.get(dashboard_id=created.id) + + # cleanup + w.dashboards.delete(dashboard_id=created.id) + + Retrieve a definition. + + Returns a JSON representation of a dashboard object, including its visualization and query objects. + + :param dashboard_id: str + + :returns: :class:`Dashboard` + + + .. py:method:: list( [, order: Optional[ListOrder], page: Optional[int], page_size: Optional[int], q: Optional[str]]) -> Iterator[Dashboard] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sql + + w = WorkspaceClient() + + all = w.dashboards.list(sql.ListDashboardsRequest()) + + Get dashboard objects. + + Fetch a paginated list of dashboard objects. + + :param order: :class:`ListOrder` (optional) + Name of dashboard attribute to order by. + :param page: int (optional) + Page number to retrieve. + :param page_size: int (optional) + Number of dashboards to return per page. + :param q: str (optional) + Full text search term. + + :returns: Iterator over :class:`Dashboard` + + + .. py:method:: restore(dashboard_id: str) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.dashboards.create(name=f'sdk-{time.time_ns()}') + + w.dashboards.restore(dashboard_id=created.id) + + # cleanup + w.dashboards.delete(dashboard_id=created.id) + + Restore a dashboard. + + A restored dashboard appears in list views and searches and can be shared. + + :param dashboard_id: str + + + + + .. py:method:: update(dashboard_id: str [, name: Optional[str], run_as_role: Optional[RunAsRole]]) -> Dashboard + + Change a dashboard definition. + + Modify this dashboard definition. This operation only affects attributes of the dashboard object. It + does not add, modify, or remove widgets. + + **Note**: You cannot undo this operation. + + :param dashboard_id: str + :param name: str (optional) + The title of this dashboard that appears in list views and at the top of the dashboard page. + :param run_as_role: :class:`RunAsRole` (optional) + Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior) + + :returns: :class:`Dashboard` + \ No newline at end of file diff --git a/docs/workspace/sql/data_sources.rst b/docs/workspace/sql/data_sources.rst new file mode 100644 index 000000000..5cf1ed526 --- /dev/null +++ b/docs/workspace/sql/data_sources.rst @@ -0,0 +1,35 @@ +``w.data_sources``: Data Sources +================================ +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: DataSourcesAPI + + This API is provided to assist you in making new query objects. When creating a query object, you may + optionally specify a `data_source_id` for the SQL warehouse against which it will run. If you don't + already know the `data_source_id` for your desired SQL warehouse, this API will help you find it. + + This API does not support searches. It returns the full list of SQL warehouses in your workspace. We + advise you to use any text editor, REST client, or `grep` to search the response from this API for the + name of your SQL warehouse as it appears in Databricks SQL. + + .. py:method:: list() -> Iterator[DataSource] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + srcs = w.data_sources.list() + + Get a list of SQL warehouses. + + Retrieves a full list of SQL warehouses available in this workspace. All fields that appear in this + API response are enumerated for clarity. However, you need only a SQL warehouse's `id` to create new + queries against it. + + :returns: Iterator over :class:`DataSource` + \ No newline at end of file diff --git a/docs/workspace/sql/dbsql_permissions.rst b/docs/workspace/sql/dbsql_permissions.rst new file mode 100644 index 000000000..07aa4f00f --- /dev/null +++ b/docs/workspace/sql/dbsql_permissions.rst @@ -0,0 +1,63 @@ +``w.dbsql_permissions``: ACL / Permissions +========================================== +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: DbsqlPermissionsAPI + + The SQL Permissions API is similar to the endpoints of the :method:permissions/set. However, this exposes + only one endpoint, which gets the Access Control List for a given object. You cannot modify any + permissions using this API. + + There are three levels of permission: + + - `CAN_VIEW`: Allows read-only access + + - `CAN_RUN`: Allows read access and run access (superset of `CAN_VIEW`) + + - `CAN_MANAGE`: Allows all actions: read, run, edit, delete, modify permissions (superset of `CAN_RUN`) + + .. py:method:: get(object_type: ObjectTypePlural, object_id: str) -> GetResponse + + Get object ACL. + + Gets a JSON representation of the access control list (ACL) for a specified object. + + :param object_type: :class:`ObjectTypePlural` + The type of object permissions to check. + :param object_id: str + Object ID. An ACL is returned for the object with this UUID. + + :returns: :class:`GetResponse` + + + .. py:method:: set(object_type: ObjectTypePlural, object_id: str [, access_control_list: Optional[List[AccessControl]]]) -> SetResponse + + Set object ACL. + + Sets the access control list (ACL) for a specified object. This operation will complete rewrite the + ACL. + + :param object_type: :class:`ObjectTypePlural` + The type of object permission to set. + :param object_id: str + Object ID. The ACL for the object with this UUID is overwritten by this request's POST content. + :param access_control_list: List[:class:`AccessControl`] (optional) + + :returns: :class:`SetResponse` + + + .. py:method:: transfer_ownership(object_type: OwnableObjectType, object_id: TransferOwnershipObjectId [, new_owner: Optional[str]]) -> Success + + Transfer object ownership. + + Transfers ownership of a dashboard, query, or alert to an active user. Requires an admin API key. + + :param object_type: :class:`OwnableObjectType` + The type of object on which to change ownership. + :param object_id: :class:`TransferOwnershipObjectId` + The ID of the object on which to change ownership. + :param new_owner: str (optional) + Email address for the new owner, who must exist in the workspace. + + :returns: :class:`Success` + \ No newline at end of file diff --git a/docs/workspace/sql/index.rst b/docs/workspace/sql/index.rst new file mode 100644 index 000000000..397de5c72 --- /dev/null +++ b/docs/workspace/sql/index.rst @@ -0,0 +1,19 @@ + +Databricks SQL +============== + +Manage Databricks SQL assets, including warehouses, dashboards, queries and query history, and alerts + +.. toctree:: + :maxdepth: 1 + + alerts + dashboard_widgets + dashboards + data_sources + dbsql_permissions + queries + query_history + query_visualizations + statement_execution + warehouses \ No newline at end of file diff --git a/docs/workspace/sql/queries.rst b/docs/workspace/sql/queries.rst new file mode 100644 index 000000000..32803f6d3 --- /dev/null +++ b/docs/workspace/sql/queries.rst @@ -0,0 +1,214 @@ +``w.queries``: Queries / Results +================================ +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: QueriesAPI + + These endpoints are used for CRUD operations on query definitions. Query definitions include the target + SQL warehouse, query text, name, description, tags, parameters, and visualizations. Queries can be + scheduled using the `sql_task` type of the Jobs API, e.g. :method:jobs/create. + + .. py:method:: create( [, data_source_id: Optional[str], description: Optional[str], name: Optional[str], options: Optional[Any], parent: Optional[str], query: Optional[str], run_as_role: Optional[RunAsRole]]) -> Query + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + srcs = w.data_sources.list() + + query = w.queries.create(name=f'sdk-{time.time_ns()}', + data_source_id=srcs[0].id, + description="test query from Go SDK", + query="SHOW TABLES") + + # cleanup + w.queries.delete(query_id=query.id) + + Create a new query definition. + + Creates a new query definition. Queries created with this endpoint belong to the authenticated user + making the request. + + The `data_source_id` field specifies the ID of the SQL warehouse to run this query against. You can + use the Data Sources API to see a complete list of available SQL warehouses. Or you can copy the + `data_source_id` from an existing query. + + **Note**: You cannot add a visualization until you create the query. + + :param data_source_id: str (optional) + Data source ID maps to the ID of the data source used by the resource and is distinct from the + warehouse ID. [Learn more]. + + [Learn more]: https://docs.databricks.com/api/workspace/datasources/list + :param description: str (optional) + General description that conveys additional information about this query such as usage notes. + :param name: str (optional) + The title of this query that appears in list views, widget headings, and on the query page. + :param options: Any (optional) + Exclusively used for storing a list parameter definitions. A parameter is an object with `title`, + `name`, `type`, and `value` properties. The `value` field here is the default value. It can be + overridden at runtime. + :param parent: str (optional) + The identifier of the workspace folder containing the object. + :param query: str (optional) + The text of the query to be run. + :param run_as_role: :class:`RunAsRole` (optional) + Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior) + + :returns: :class:`Query` + + + .. py:method:: delete(query_id: str) + + Delete a query. + + Moves a query to the trash. Trashed queries immediately disappear from searches and list views, and + they cannot be used for alerts. The trash is deleted after 30 days. + + :param query_id: str + + + + + .. py:method:: get(query_id: str) -> Query + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + srcs = w.data_sources.list() + + query = w.queries.create(name=f'sdk-{time.time_ns()}', + data_source_id=srcs[0].id, + description="test query from Go SDK", + query="SHOW TABLES") + + by_id = w.queries.get(query_id=query.id) + + # cleanup + w.queries.delete(query_id=query.id) + + Get a query definition. + + Retrieve a query object definition along with contextual permissions information about the currently + authenticated user. + + :param query_id: str + + :returns: :class:`Query` + + + .. py:method:: list( [, order: Optional[str], page: Optional[int], page_size: Optional[int], q: Optional[str]]) -> Iterator[Query] + + Get a list of queries. + + Gets a list of queries. Optionally, this list can be filtered by a search term. + + :param order: str (optional) + Name of query attribute to order by. Default sort order is ascending. Append a dash (`-`) to order + descending instead. + + - `name`: The name of the query. + + - `created_at`: The timestamp the query was created. + + - `runtime`: The time it took to run this query. This is blank for parameterized queries. A blank + value is treated as the highest value for sorting. + + - `executed_at`: The timestamp when the query was last run. + + - `created_by`: The user name of the user that created the query. + :param page: int (optional) + Page number to retrieve. + :param page_size: int (optional) + Number of queries to return per page. + :param q: str (optional) + Full text search term + + :returns: Iterator over :class:`Query` + + + .. py:method:: restore(query_id: str) + + Restore a query. + + Restore a query that has been moved to the trash. A restored query appears in list views and searches. + You can use restored queries for alerts. + + :param query_id: str + + + + + .. py:method:: update(query_id: str [, data_source_id: Optional[str], description: Optional[str], name: Optional[str], options: Optional[Any], query: Optional[str], run_as_role: Optional[RunAsRole]]) -> Query + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + srcs = w.data_sources.list() + + query = w.queries.create(name=f'sdk-{time.time_ns()}', + data_source_id=srcs[0].id, + description="test query from Go SDK", + query="SHOW TABLES") + + updated = w.queries.update(query_id=query.id, + name=f'sdk-{time.time_ns()}', + data_source_id=srcs[0].id, + description="UPDATED: test query from Go SDK", + query="SELECT 2+2") + + # cleanup + w.queries.delete(query_id=query.id) + + Change a query definition. + + Modify this query definition. + + **Note**: You cannot undo this operation. + + :param query_id: str + :param data_source_id: str (optional) + Data source ID maps to the ID of the data source used by the resource and is distinct from the + warehouse ID. [Learn more]. + + [Learn more]: https://docs.databricks.com/api/workspace/datasources/list + :param description: str (optional) + General description that conveys additional information about this query such as usage notes. + :param name: str (optional) + The title of this query that appears in list views, widget headings, and on the query page. + :param options: Any (optional) + Exclusively used for storing a list parameter definitions. A parameter is an object with `title`, + `name`, `type`, and `value` properties. The `value` field here is the default value. It can be + overridden at runtime. + :param query: str (optional) + The text of the query to be run. + :param run_as_role: :class:`RunAsRole` (optional) + Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior) + + :returns: :class:`Query` + \ No newline at end of file diff --git a/docs/workspace/sql/query_history.rst b/docs/workspace/sql/query_history.rst new file mode 100644 index 000000000..c4c6ee9ae --- /dev/null +++ b/docs/workspace/sql/query_history.rst @@ -0,0 +1,40 @@ +``w.query_history``: Query History +================================== +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: QueryHistoryAPI + + Access the history of queries through SQL warehouses. + + .. py:method:: list( [, filter_by: Optional[QueryFilter], include_metrics: Optional[bool], max_results: Optional[int], page_token: Optional[str]]) -> Iterator[QueryInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sql + + w = WorkspaceClient() + + _ = w.query_history.list(filter_by=sql.QueryFilter( + query_start_time_range=sql.TimeRange(start_time_ms=1690243200000, end_time_ms=1690329600000))) + + List Queries. + + List the history of queries through SQL warehouses. + + You can filter by user ID, warehouse ID, status, and time range. + + :param filter_by: :class:`QueryFilter` (optional) + A filter to limit query history results. This field is optional. + :param include_metrics: bool (optional) + Whether to include metrics about query. + :param max_results: int (optional) + Limit the number of results returned in one page. The default is 100. + :param page_token: str (optional) + A token that can be used to get the next page of results. + + :returns: Iterator over :class:`QueryInfo` + \ No newline at end of file diff --git a/docs/workspace/sql/query_visualizations.rst b/docs/workspace/sql/query_visualizations.rst new file mode 100644 index 000000000..7ef5b1cdc --- /dev/null +++ b/docs/workspace/sql/query_visualizations.rst @@ -0,0 +1,58 @@ +``w.query_visualizations``: Query Visualizations +================================================ +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: QueryVisualizationsAPI + + This is an evolving API that facilitates the addition and removal of vizualisations from existing queries + within the Databricks Workspace. Data structures may change over time. + + .. py:method:: create(query_id: str, type: str, options: Any [, description: Optional[str], name: Optional[str]]) -> Visualization + + Add visualization to a query. + + :param query_id: str + The identifier returned by :method:queries/create + :param type: str + The type of visualization: chart, table, pivot table, and so on. + :param options: Any + The options object varies widely from one visualization type to the next and is unsupported. + Databricks does not recommend modifying visualization settings in JSON. + :param description: str (optional) + A short description of this visualization. This is not displayed in the UI. + :param name: str (optional) + The name of the visualization that appears on dashboards and the query screen. + + :returns: :class:`Visualization` + + + .. py:method:: delete(id: str) + + Remove visualization. + + :param id: str + Widget ID returned by :method:queryvizualisations/create + + + + + .. py:method:: update(id: str [, created_at: Optional[str], description: Optional[str], name: Optional[str], options: Optional[Any], type: Optional[str], updated_at: Optional[str]]) -> Visualization + + Edit existing visualization. + + :param id: str + The UUID for this visualization. + :param created_at: str (optional) + :param description: str (optional) + A short description of this visualization. This is not displayed in the UI. + :param name: str (optional) + The name of the visualization that appears on dashboards and the query screen. + :param options: Any (optional) + The options object varies widely from one visualization type to the next and is unsupported. + Databricks does not recommend modifying visualization settings in JSON. + :param type: str (optional) + The type of visualization: chart, table, pivot table, and so on. + :param updated_at: str (optional) + + :returns: :class:`Visualization` + \ No newline at end of file diff --git a/docs/workspace/sql/statement_execution.rst b/docs/workspace/sql/statement_execution.rst new file mode 100644 index 000000000..d5c479462 --- /dev/null +++ b/docs/workspace/sql/statement_execution.rst @@ -0,0 +1,270 @@ +``w.statement_execution``: Statement Execution +============================================== +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: StatementExecutionAPI + + The Databricks SQL Statement Execution API can be used to execute SQL statements on a SQL warehouse and + fetch the result. + + **Getting started** + + We suggest beginning with the [Databricks SQL Statement Execution API tutorial]. + + **Overview of statement execution and result fetching** + + Statement execution begins by issuing a :method:statementexecution/executeStatement request with a valid + SQL statement and warehouse ID, along with optional parameters such as the data catalog and output format. + If no other parameters are specified, the server will wait for up to 10s before returning a response. If + the statement has completed within this timespan, the response will include the result data as a JSON + array and metadata. Otherwise, if no result is available after the 10s timeout expired, the response will + provide the statement ID that can be used to poll for results by using a + :method:statementexecution/getStatement request. + + You can specify whether the call should behave synchronously, asynchronously or start synchronously with a + fallback to asynchronous execution. This is controlled with the `wait_timeout` and `on_wait_timeout` + settings. If `wait_timeout` is set between 5-50 seconds (default: 10s), the call waits for results up to + the specified timeout; when set to `0s`, the call is asynchronous and responds immediately with a + statement ID. The `on_wait_timeout` setting specifies what should happen when the timeout is reached while + the statement execution has not yet finished. This can be set to either `CONTINUE`, to fallback to + asynchronous mode, or it can be set to `CANCEL`, which cancels the statement. + + In summary: - Synchronous mode - `wait_timeout=30s` and `on_wait_timeout=CANCEL` - The call waits up to 30 + seconds; if the statement execution finishes within this time, the result data is returned directly in the + response. If the execution takes longer than 30 seconds, the execution is canceled and the call returns + with a `CANCELED` state. - Asynchronous mode - `wait_timeout=0s` (`on_wait_timeout` is ignored) - The call + doesn't wait for the statement to finish but returns directly with a statement ID. The status of the + statement execution can be polled by issuing :method:statementexecution/getStatement with the statement + ID. Once the execution has succeeded, this call also returns the result and metadata in the response. - + Hybrid mode (default) - `wait_timeout=10s` and `on_wait_timeout=CONTINUE` - The call waits for up to 10 + seconds; if the statement execution finishes within this time, the result data is returned directly in the + response. If the execution takes longer than 10 seconds, a statement ID is returned. The statement ID can + be used to fetch status and results in the same way as in the asynchronous mode. + + Depending on the size, the result can be split into multiple chunks. If the statement execution is + successful, the statement response contains a manifest and the first chunk of the result. The manifest + contains schema information and provides metadata for each chunk in the result. Result chunks can be + retrieved by index with :method:statementexecution/getStatementResultChunkN which may be called in any + order and in parallel. For sequential fetching, each chunk, apart from the last, also contains a + `next_chunk_index` and `next_chunk_internal_link` that point to the next chunk. + + A statement can be canceled with :method:statementexecution/cancelExecution. + + **Fetching result data: format and disposition** + + To specify the format of the result data, use the `format` field, which can be set to one of the following + options: `JSON_ARRAY` (JSON), `ARROW_STREAM` ([Apache Arrow Columnar]), or `CSV`. + + There are two ways to receive statement results, controlled by the `disposition` setting, which can be + either `INLINE` or `EXTERNAL_LINKS`: + + - `INLINE`: In this mode, the result data is directly included in the response. It's best suited for + smaller results. This mode can only be used with the `JSON_ARRAY` format. + + - `EXTERNAL_LINKS`: In this mode, the response provides links that can be used to download the result data + in chunks separately. This approach is ideal for larger results and offers higher throughput. This mode + can be used with all the formats: `JSON_ARRAY`, `ARROW_STREAM`, and `CSV`. + + By default, the API uses `format=JSON_ARRAY` and `disposition=INLINE`. + + **Limits and limitations** + + Note: The byte limit for INLINE disposition is based on internal storage metrics and will not exactly + match the byte count of the actual payload. + + - Statements with `disposition=INLINE` are limited to 25 MiB and will fail when this limit is exceeded. - + Statements with `disposition=EXTERNAL_LINKS` are limited to 100 GiB. Result sets larger than this limit + will be truncated. Truncation is indicated by the `truncated` field in the result manifest. - The maximum + query text size is 16 MiB. - Cancelation might silently fail. A successful response from a cancel request + indicates that the cancel request was successfully received and sent to the processing engine. However, an + outstanding statement might have already completed execution when the cancel request arrives. Polling for + status until a terminal state is reached is a reliable way to determine the final state. - Wait timeouts + are approximate, occur server-side, and cannot account for things such as caller delays and network + latency from caller to service. - The system will auto-close a statement after one hour if the client + stops polling and thus you must poll at least once an hour. - The results are only available for one hour + after success; polling does not extend this. + + [Apache Arrow Columnar]: https://arrow.apache.org/overview/ + [Databricks SQL Statement Execution API tutorial]: https://docs.databricks.com/sql/api/sql-execution-tutorial.html + + .. py:method:: cancel_execution(statement_id: str) + + Cancel statement execution. + + Requests that an executing statement be canceled. Callers must poll for status to see the terminal + state. + + :param statement_id: str + The statement ID is returned upon successfully submitting a SQL statement, and is a required + reference for all subsequent calls. + + + + + .. py:method:: execute_statement(statement: str, warehouse_id: str [, byte_limit: Optional[int], catalog: Optional[str], disposition: Optional[Disposition], format: Optional[Format], on_wait_timeout: Optional[ExecuteStatementRequestOnWaitTimeout], parameters: Optional[List[StatementParameterListItem]], row_limit: Optional[int], schema: Optional[str], wait_timeout: Optional[str]]) -> ExecuteStatementResponse + + Execute a SQL statement. + + :param statement: str + The SQL statement to execute. The statement can optionally be parameterized, see `parameters`. + :param warehouse_id: str + Warehouse upon which to execute a statement. See also [What are SQL + warehouses?](/sql/admin/warehouse-type.html) + :param byte_limit: int (optional) + Applies the given byte limit to the statement's result size. Byte counts are based on internal data + representations and might not match the final size in the requested `format`. If the result was + truncated due to the byte limit, then `truncated` in the response is set to `true`. When using + `EXTERNAL_LINKS` disposition, a default `byte_limit` of 100 GiB is applied if `byte_limit` is not + explcitly set. + :param catalog: str (optional) + Sets default catalog for statement execution, similar to [`USE CATALOG`] in SQL. + + [`USE CATALOG`]: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-use-catalog.html + :param disposition: :class:`Disposition` (optional) + The fetch disposition provides two modes of fetching results: `INLINE` and `EXTERNAL_LINKS`. + + Statements executed with `INLINE` disposition will return result data inline, in `JSON_ARRAY` + format, in a series of chunks. If a given statement produces a result set with a size larger than 25 + MiB, that statement execution is aborted, and no result set will be available. + + **NOTE** Byte limits are computed based upon internal representations of the result set data, and + might not match the sizes visible in JSON responses. + + Statements executed with `EXTERNAL_LINKS` disposition will return result data as external links: + URLs that point to cloud storage internal to the workspace. Using `EXTERNAL_LINKS` disposition + allows statements to generate arbitrarily sized result sets for fetching up to 100 GiB. The + resulting links have two important properties: + + 1. They point to resources _external_ to the Databricks compute; therefore any associated + authentication information (typically a personal access token, OAuth token, or similar) _must be + removed_ when fetching from these links. + + 2. These are presigned URLs with a specific expiration, indicated in the response. The behavior when + attempting to use an expired link is cloud specific. + :param format: :class:`Format` (optional) + Statement execution supports three result formats: `JSON_ARRAY` (default), `ARROW_STREAM`, and + `CSV`. + + Important: The formats `ARROW_STREAM` and `CSV` are supported only with `EXTERNAL_LINKS` + disposition. `JSON_ARRAY` is supported in `INLINE` and `EXTERNAL_LINKS` disposition. + + When specifying `format=JSON_ARRAY`, result data will be formatted as an array of arrays of values, + where each value is either the *string representation* of a value, or `null`. For example, the + output of `SELECT concat('id-', id) AS strCol, id AS intCol, null AS nullCol FROM range(3)` would + look like this: + + ``` [ [ "id-1", "1", null ], [ "id-2", "2", null ], [ "id-3", "3", null ], ] ``` + + When specifying `format=JSON_ARRAY` and `disposition=EXTERNAL_LINKS`, each chunk in the result + contains compact JSON with no indentation or extra whitespace. + + When specifying `format=ARROW_STREAM` and `disposition=EXTERNAL_LINKS`, each chunk in the result + will be formatted as Apache Arrow Stream. See the [Apache Arrow streaming format]. + + When specifying `format=CSV` and `disposition=EXTERNAL_LINKS`, each chunk in the result will be a + CSV according to [RFC 4180] standard. All the columns values will have *string representation* + similar to the `JSON_ARRAY` format, and `null` values will be encoded as “null”. Only the first + chunk in the result would contain a header row with column names. For example, the output of `SELECT + concat('id-', id) AS strCol, id AS intCol, null as nullCol FROM range(3)` would look like this: + + ``` strCol,intCol,nullCol id-1,1,null id-2,2,null id-3,3,null ``` + + [Apache Arrow streaming format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format + [RFC 4180]: https://www.rfc-editor.org/rfc/rfc4180 + :param on_wait_timeout: :class:`ExecuteStatementRequestOnWaitTimeout` (optional) + When `wait_timeout > 0s`, the call will block up to the specified time. If the statement execution + doesn't finish within this time, `on_wait_timeout` determines whether the execution should continue + or be canceled. When set to `CONTINUE`, the statement execution continues asynchronously and the + call returns a statement ID which can be used for polling with + :method:statementexecution/getStatement. When set to `CANCEL`, the statement execution is canceled + and the call returns with a `CANCELED` state. + :param parameters: List[:class:`StatementParameterListItem`] (optional) + A list of parameters to pass into a SQL statement containing parameter markers. A parameter consists + of a name, a value, and optionally a type. To represent a NULL value, the `value` field may be + omitted or set to `null` explicitly. If the `type` field is omitted, the value is interpreted as a + string. + + If the type is given, parameters will be checked for type correctness according to the given type. A + value is correct if the provided string can be converted to the requested type using the `cast` + function. The exact semantics are described in the section [`cast` function] of the SQL language + reference. + + For example, the following statement contains two parameters, `my_name` and `my_date`: + + SELECT * FROM my_table WHERE name = :my_name AND date = :my_date + + The parameters can be passed in the request body as follows: + + { ..., "statement": "SELECT * FROM my_table WHERE name = :my_name AND date = :my_date", + "parameters": [ { "name": "my_name", "value": "the name" }, { "name": "my_date", "value": + "2020-01-01", "type": "DATE" } ] } + + Currently, positional parameters denoted by a `?` marker are not supported by the Databricks SQL + Statement Execution API. + + Also see the section [Parameter markers] of the SQL language reference. + + [Parameter markers]: https://docs.databricks.com/sql/language-manual/sql-ref-parameter-marker.html + [`cast` function]: https://docs.databricks.com/sql/language-manual/functions/cast.html + :param row_limit: int (optional) + Applies the given row limit to the statement's result set, but unlike the `LIMIT` clause in SQL, it + also sets the `truncated` field in the response to indicate whether the result was trimmed due to + the limit or not. + :param schema: str (optional) + Sets default schema for statement execution, similar to [`USE SCHEMA`] in SQL. + + [`USE SCHEMA`]: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-use-schema.html + :param wait_timeout: str (optional) + The time in seconds the call will wait for the statement's result set as `Ns`, where `N` can be set + to 0 or to a value between 5 and 50. + + When set to `0s`, the statement will execute in asynchronous mode and the call will not wait for the + execution to finish. In this case, the call returns directly with `PENDING` state and a statement ID + which can be used for polling with :method:statementexecution/getStatement. + + When set between 5 and 50 seconds, the call will behave synchronously up to this timeout and wait + for the statement execution to finish. If the execution finishes within this time, the call returns + immediately with a manifest and result data (or a `FAILED` state in case of an execution error). If + the statement takes longer to execute, `on_wait_timeout` determines what should happen after the + timeout is reached. + + :returns: :class:`ExecuteStatementResponse` + + + .. py:method:: get_statement(statement_id: str) -> GetStatementResponse + + Get status, manifest, and result first chunk. + + This request can be used to poll for the statement's status. When the `status.state` field is + `SUCCEEDED` it will also return the result manifest and the first chunk of the result data. When the + statement is in the terminal states `CANCELED`, `CLOSED` or `FAILED`, it returns HTTP 200 with the + state set. After at least 12 hours in terminal state, the statement is removed from the warehouse and + further calls will receive an HTTP 404 response. + + **NOTE** This call currently might take up to 5 seconds to get the latest status and result. + + :param statement_id: str + The statement ID is returned upon successfully submitting a SQL statement, and is a required + reference for all subsequent calls. + + :returns: :class:`GetStatementResponse` + + + .. py:method:: get_statement_result_chunk_n(statement_id: str, chunk_index: int) -> ResultData + + Get result chunk by index. + + After the statement execution has `SUCCEEDED`, this request can be used to fetch any chunk by index. + Whereas the first chunk with `chunk_index=0` is typically fetched with + :method:statementexecution/executeStatement or :method:statementexecution/getStatement, this request + can be used to fetch subsequent chunks. The response structure is identical to the nested `result` + element described in the :method:statementexecution/getStatement request, and similarly includes the + `next_chunk_index` and `next_chunk_internal_link` fields for simple iteration through the result set. + + :param statement_id: str + The statement ID is returned upon successfully submitting a SQL statement, and is a required + reference for all subsequent calls. + :param chunk_index: int + + :returns: :class:`ResultData` + \ No newline at end of file diff --git a/docs/workspace/sql/warehouses.rst b/docs/workspace/sql/warehouses.rst new file mode 100644 index 000000000..793852680 --- /dev/null +++ b/docs/workspace/sql/warehouses.rst @@ -0,0 +1,394 @@ +``w.warehouses``: SQL Warehouses +================================ +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: WarehousesAPI + + A SQL warehouse is a compute resource that lets you run SQL commands on data objects within Databricks + SQL. Compute resources are infrastructure resources that provide processing capabilities in the cloud. + + .. py:method:: create( [, auto_stop_mins: Optional[int], channel: Optional[Channel], cluster_size: Optional[str], creator_name: Optional[str], enable_photon: Optional[bool], enable_serverless_compute: Optional[bool], instance_profile_arn: Optional[str], max_num_clusters: Optional[int], min_num_clusters: Optional[int], name: Optional[str], spot_instance_policy: Optional[SpotInstancePolicy], tags: Optional[EndpointTags], warehouse_type: Optional[CreateWarehouseRequestWarehouseType]]) -> Wait[GetWarehouseResponse] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.warehouses.create(name=f'sdk-{time.time_ns()}', + cluster_size="2X-Small", + max_num_clusters=1, + auto_stop_mins=10).result() + + # cleanup + w.warehouses.delete(id=created.id) + + Create a warehouse. + + Creates a new SQL warehouse. + + :param auto_stop_mins: int (optional) + The amount of time in minutes that a SQL warehouse must be idle (i.e., no RUNNING queries) before it + is automatically stopped. + + Supported values: - Must be == 0 or >= 10 mins - 0 indicates no autostop. + + Defaults to 120 mins + :param channel: :class:`Channel` (optional) + Channel Details + :param cluster_size: str (optional) + Size of the clusters allocated for this warehouse. Increasing the size of a spark cluster allows you + to run larger queries on it. If you want to increase the number of concurrent queries, please tune + max_num_clusters. + + Supported values: - 2X-Small - X-Small - Small - Medium - Large - X-Large - 2X-Large - 3X-Large - + 4X-Large + :param creator_name: str (optional) + warehouse creator name + :param enable_photon: bool (optional) + Configures whether the warehouse should use Photon optimized clusters. + + Defaults to false. + :param enable_serverless_compute: bool (optional) + Configures whether the warehouse should use serverless compute + :param instance_profile_arn: str (optional) + Deprecated. Instance profile used to pass IAM role to the cluster + :param max_num_clusters: int (optional) + Maximum number of clusters that the autoscaler will create to handle concurrent queries. + + Supported values: - Must be >= min_num_clusters - Must be <= 30. + + Defaults to min_clusters if unset. + :param min_num_clusters: int (optional) + Minimum number of available clusters that will be maintained for this SQL warehouse. Increasing this + will ensure that a larger number of clusters are always running and therefore may reduce the cold + start time for new queries. This is similar to reserved vs. revocable cores in a resource manager. + + Supported values: - Must be > 0 - Must be <= min(max_num_clusters, 30) + + Defaults to 1 + :param name: str (optional) + Logical name for the cluster. + + Supported values: - Must be unique within an org. - Must be less than 100 characters. + :param spot_instance_policy: :class:`SpotInstancePolicy` (optional) + Configurations whether the warehouse should use spot instances. + :param tags: :class:`EndpointTags` (optional) + A set of key-value pairs that will be tagged on all resources (e.g., AWS instances and EBS volumes) + associated with this SQL warehouse. + + Supported values: - Number of tags < 45. + :param warehouse_type: :class:`CreateWarehouseRequestWarehouseType` (optional) + Warehouse type: `PRO` or `CLASSIC`. If you want to use serverless compute, you must set to `PRO` and + also set the field `enable_serverless_compute` to `true`. + + :returns: + Long-running operation waiter for :class:`GetWarehouseResponse`. + See :method:wait_get_warehouse_running for more details. + + + .. py:method:: create_and_wait( [, auto_stop_mins: Optional[int], channel: Optional[Channel], cluster_size: Optional[str], creator_name: Optional[str], enable_photon: Optional[bool], enable_serverless_compute: Optional[bool], instance_profile_arn: Optional[str], max_num_clusters: Optional[int], min_num_clusters: Optional[int], name: Optional[str], spot_instance_policy: Optional[SpotInstancePolicy], tags: Optional[EndpointTags], warehouse_type: Optional[CreateWarehouseRequestWarehouseType], timeout: datetime.timedelta = 0:20:00]) -> GetWarehouseResponse + + + .. py:method:: delete(id: str) + + Delete a warehouse. + + Deletes a SQL warehouse. + + :param id: str + Required. Id of the SQL warehouse. + + + + + .. py:method:: edit(id: str [, auto_stop_mins: Optional[int], channel: Optional[Channel], cluster_size: Optional[str], creator_name: Optional[str], enable_photon: Optional[bool], enable_serverless_compute: Optional[bool], instance_profile_arn: Optional[str], max_num_clusters: Optional[int], min_num_clusters: Optional[int], name: Optional[str], spot_instance_policy: Optional[SpotInstancePolicy], tags: Optional[EndpointTags], warehouse_type: Optional[EditWarehouseRequestWarehouseType]]) -> Wait[GetWarehouseResponse] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.warehouses.create(name=f'sdk-{time.time_ns()}', + cluster_size="2X-Small", + max_num_clusters=1, + auto_stop_mins=10).result() + + _ = w.warehouses.edit(id=created.id, + name=f'sdk-{time.time_ns()}', + cluster_size="2X-Small", + max_num_clusters=1, + auto_stop_mins=10) + + # cleanup + w.warehouses.delete(id=created.id) + + Update a warehouse. + + Updates the configuration for a SQL warehouse. + + :param id: str + Required. Id of the warehouse to configure. + :param auto_stop_mins: int (optional) + The amount of time in minutes that a SQL warehouse must be idle (i.e., no RUNNING queries) before it + is automatically stopped. + + Supported values: - Must be == 0 or >= 10 mins - 0 indicates no autostop. + + Defaults to 120 mins + :param channel: :class:`Channel` (optional) + Channel Details + :param cluster_size: str (optional) + Size of the clusters allocated for this warehouse. Increasing the size of a spark cluster allows you + to run larger queries on it. If you want to increase the number of concurrent queries, please tune + max_num_clusters. + + Supported values: - 2X-Small - X-Small - Small - Medium - Large - X-Large - 2X-Large - 3X-Large - + 4X-Large + :param creator_name: str (optional) + warehouse creator name + :param enable_photon: bool (optional) + Configures whether the warehouse should use Photon optimized clusters. + + Defaults to false. + :param enable_serverless_compute: bool (optional) + Configures whether the warehouse should use serverless compute. + :param instance_profile_arn: str (optional) + Deprecated. Instance profile used to pass IAM role to the cluster + :param max_num_clusters: int (optional) + Maximum number of clusters that the autoscaler will create to handle concurrent queries. + + Supported values: - Must be >= min_num_clusters - Must be <= 30. + + Defaults to min_clusters if unset. + :param min_num_clusters: int (optional) + Minimum number of available clusters that will be maintained for this SQL warehouse. Increasing this + will ensure that a larger number of clusters are always running and therefore may reduce the cold + start time for new queries. This is similar to reserved vs. revocable cores in a resource manager. + + Supported values: - Must be > 0 - Must be <= min(max_num_clusters, 30) + + Defaults to 1 + :param name: str (optional) + Logical name for the cluster. + + Supported values: - Must be unique within an org. - Must be less than 100 characters. + :param spot_instance_policy: :class:`SpotInstancePolicy` (optional) + Configurations whether the warehouse should use spot instances. + :param tags: :class:`EndpointTags` (optional) + A set of key-value pairs that will be tagged on all resources (e.g., AWS instances and EBS volumes) + associated with this SQL warehouse. + + Supported values: - Number of tags < 45. + :param warehouse_type: :class:`EditWarehouseRequestWarehouseType` (optional) + Warehouse type: `PRO` or `CLASSIC`. If you want to use serverless compute, you must set to `PRO` and + also set the field `enable_serverless_compute` to `true`. + + :returns: + Long-running operation waiter for :class:`GetWarehouseResponse`. + See :method:wait_get_warehouse_running for more details. + + + .. py:method:: edit_and_wait(id: str [, auto_stop_mins: Optional[int], channel: Optional[Channel], cluster_size: Optional[str], creator_name: Optional[str], enable_photon: Optional[bool], enable_serverless_compute: Optional[bool], instance_profile_arn: Optional[str], max_num_clusters: Optional[int], min_num_clusters: Optional[int], name: Optional[str], spot_instance_policy: Optional[SpotInstancePolicy], tags: Optional[EndpointTags], warehouse_type: Optional[EditWarehouseRequestWarehouseType], timeout: datetime.timedelta = 0:20:00]) -> GetWarehouseResponse + + + .. py:method:: get(id: str) -> GetWarehouseResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.warehouses.create(name=f'sdk-{time.time_ns()}', + cluster_size="2X-Small", + max_num_clusters=1, + auto_stop_mins=10).result() + + wh = w.warehouses.get(id=created.id) + + # cleanup + w.warehouses.delete(id=created.id) + + Get warehouse info. + + Gets the information for a single SQL warehouse. + + :param id: str + Required. Id of the SQL warehouse. + + :returns: :class:`GetWarehouseResponse` + + + .. py:method:: get_permission_levels(warehouse_id: str) -> GetWarehousePermissionLevelsResponse + + Get SQL warehouse permission levels. + + Gets the permission levels that a user can have on an object. + + :param warehouse_id: str + The SQL warehouse for which to get or manage permissions. + + :returns: :class:`GetWarehousePermissionLevelsResponse` + + + .. py:method:: get_permissions(warehouse_id: str) -> WarehousePermissions + + Get SQL warehouse permissions. + + Gets the permissions of a SQL warehouse. SQL warehouses can inherit permissions from their root + object. + + :param warehouse_id: str + The SQL warehouse for which to get or manage permissions. + + :returns: :class:`WarehousePermissions` + + + .. py:method:: get_workspace_warehouse_config() -> GetWorkspaceWarehouseConfigResponse + + Get the workspace configuration. + + Gets the workspace level configuration that is shared by all SQL warehouses in a workspace. + + :returns: :class:`GetWorkspaceWarehouseConfigResponse` + + + .. py:method:: list( [, run_as_user_id: Optional[int]]) -> Iterator[EndpointInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sql + + w = WorkspaceClient() + + all = w.warehouses.list(sql.ListWarehousesRequest()) + + List warehouses. + + Lists all SQL warehouses that a user has manager permissions on. + + :param run_as_user_id: int (optional) + Service Principal which will be used to fetch the list of warehouses. If not specified, the user + from the session header is used. + + :returns: Iterator over :class:`EndpointInfo` + + + .. py:method:: set_permissions(warehouse_id: str [, access_control_list: Optional[List[WarehouseAccessControlRequest]]]) -> WarehousePermissions + + Set SQL warehouse permissions. + + Sets permissions on a SQL warehouse. SQL warehouses can inherit permissions from their root object. + + :param warehouse_id: str + The SQL warehouse for which to get or manage permissions. + :param access_control_list: List[:class:`WarehouseAccessControlRequest`] (optional) + + :returns: :class:`WarehousePermissions` + + + .. py:method:: set_workspace_warehouse_config( [, channel: Optional[Channel], config_param: Optional[RepeatedEndpointConfPairs], data_access_config: Optional[List[EndpointConfPair]], enabled_warehouse_types: Optional[List[WarehouseTypePair]], global_param: Optional[RepeatedEndpointConfPairs], google_service_account: Optional[str], instance_profile_arn: Optional[str], security_policy: Optional[SetWorkspaceWarehouseConfigRequestSecurityPolicy], sql_configuration_parameters: Optional[RepeatedEndpointConfPairs]]) + + Set the workspace configuration. + + Sets the workspace level configuration that is shared by all SQL warehouses in a workspace. + + :param channel: :class:`Channel` (optional) + Optional: Channel selection details + :param config_param: :class:`RepeatedEndpointConfPairs` (optional) + Deprecated: Use sql_configuration_parameters + :param data_access_config: List[:class:`EndpointConfPair`] (optional) + Spark confs for external hive metastore configuration JSON serialized size must be less than <= 512K + :param enabled_warehouse_types: List[:class:`WarehouseTypePair`] (optional) + List of Warehouse Types allowed in this workspace (limits allowed value of the type field in + CreateWarehouse and EditWarehouse). Note: Some types cannot be disabled, they don't need to be + specified in SetWorkspaceWarehouseConfig. Note: Disabling a type may cause existing warehouses to be + converted to another type. Used by frontend to save specific type availability in the warehouse + create and edit form UI. + :param global_param: :class:`RepeatedEndpointConfPairs` (optional) + Deprecated: Use sql_configuration_parameters + :param google_service_account: str (optional) + GCP only: Google Service Account used to pass to cluster to access Google Cloud Storage + :param instance_profile_arn: str (optional) + AWS Only: Instance profile used to pass IAM role to the cluster + :param security_policy: :class:`SetWorkspaceWarehouseConfigRequestSecurityPolicy` (optional) + Security policy for warehouses + :param sql_configuration_parameters: :class:`RepeatedEndpointConfPairs` (optional) + SQL configuration parameters + + + + + .. py:method:: start(id: str) -> Wait[GetWarehouseResponse] + + Start a warehouse. + + Starts a SQL warehouse. + + :param id: str + Required. Id of the SQL warehouse. + + :returns: + Long-running operation waiter for :class:`GetWarehouseResponse`. + See :method:wait_get_warehouse_running for more details. + + + .. py:method:: start_and_wait(id: str, timeout: datetime.timedelta = 0:20:00) -> GetWarehouseResponse + + + .. py:method:: stop(id: str) -> Wait[GetWarehouseResponse] + + Stop a warehouse. + + Stops a SQL warehouse. + + :param id: str + Required. Id of the SQL warehouse. + + :returns: + Long-running operation waiter for :class:`GetWarehouseResponse`. + See :method:wait_get_warehouse_stopped for more details. + + + .. py:method:: stop_and_wait(id: str, timeout: datetime.timedelta = 0:20:00) -> GetWarehouseResponse + + + .. py:method:: update_permissions(warehouse_id: str [, access_control_list: Optional[List[WarehouseAccessControlRequest]]]) -> WarehousePermissions + + Update SQL warehouse permissions. + + Updates the permissions on a SQL warehouse. SQL warehouses can inherit permissions from their root + object. + + :param warehouse_id: str + The SQL warehouse for which to get or manage permissions. + :param access_control_list: List[:class:`WarehouseAccessControlRequest`] (optional) + + :returns: :class:`WarehousePermissions` + + + .. py:method:: wait_get_warehouse_running(id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[GetWarehouseResponse], None]]) -> GetWarehouseResponse + + + .. py:method:: wait_get_warehouse_stopped(id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[GetWarehouseResponse], None]]) -> GetWarehouseResponse diff --git a/docs/workspace/vectorsearch/index.rst b/docs/workspace/vectorsearch/index.rst new file mode 100644 index 000000000..b4af05509 --- /dev/null +++ b/docs/workspace/vectorsearch/index.rst @@ -0,0 +1,11 @@ + +Vector Search +============= + +Create and query Vector Search indexes + +.. toctree:: + :maxdepth: 1 + + vector_search_endpoints + vector_search_indexes \ No newline at end of file diff --git a/docs/workspace/vectorsearch/vector_search_endpoints.rst b/docs/workspace/vectorsearch/vector_search_endpoints.rst new file mode 100644 index 000000000..64d92cec2 --- /dev/null +++ b/docs/workspace/vectorsearch/vector_search_endpoints.rst @@ -0,0 +1,60 @@ +``w.vector_search_endpoints``: Endpoints +======================================== +.. currentmodule:: databricks.sdk.service.vectorsearch + +.. py:class:: VectorSearchEndpointsAPI + + **Endpoint**: Represents the compute resources to host vector search indexes. + + .. py:method:: create_endpoint(name: str, endpoint_type: EndpointType) -> Wait[EndpointInfo] + + Create an endpoint. + + Create a new endpoint. + + :param name: str + Name of endpoint + :param endpoint_type: :class:`EndpointType` + Type of endpoint. + + :returns: + Long-running operation waiter for :class:`EndpointInfo`. + See :method:wait_get_endpoint_vector_search_endpoint_online for more details. + + + .. py:method:: create_endpoint_and_wait(name: str, endpoint_type: EndpointType, timeout: datetime.timedelta = 0:20:00) -> EndpointInfo + + + .. py:method:: delete_endpoint(endpoint_name: str, name: str) + + Delete an endpoint. + + :param endpoint_name: str + Name of the endpoint + :param name: str + Name of the endpoint to delete + + + + + .. py:method:: get_endpoint(endpoint_name: str) -> EndpointInfo + + Get an endpoint. + + :param endpoint_name: str + Name of the endpoint + + :returns: :class:`EndpointInfo` + + + .. py:method:: list_endpoints( [, page_token: Optional[str]]) -> Iterator[EndpointInfo] + + List all endpoints. + + :param page_token: str (optional) + Token for pagination + + :returns: Iterator over :class:`EndpointInfo` + + + .. py:method:: wait_get_endpoint_vector_search_endpoint_online(endpoint_name: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[EndpointInfo], None]]) -> EndpointInfo diff --git a/docs/workspace/vectorsearch/vector_search_indexes.rst b/docs/workspace/vectorsearch/vector_search_indexes.rst new file mode 100644 index 000000000..f79335e4f --- /dev/null +++ b/docs/workspace/vectorsearch/vector_search_indexes.rst @@ -0,0 +1,145 @@ +``w.vector_search_indexes``: Indexes +==================================== +.. currentmodule:: databricks.sdk.service.vectorsearch + +.. py:class:: VectorSearchIndexesAPI + + **Index**: An efficient representation of your embedding vectors that supports real-time and efficient + approximate nearest neighbor (ANN) search queries. + + There are 2 types of Vector Search indexes: * **Delta Sync Index**: An index that automatically syncs with + a source Delta Table, automatically and incrementally updating the index as the underlying data in the + Delta Table changes. * **Direct Vector Access Index**: An index that supports direct read and write of + vectors and metadata through our REST and SDK APIs. With this model, the user manages index updates. + + .. py:method:: create_index(name: str, primary_key: str, index_type: VectorIndexType [, delta_sync_vector_index_spec: Optional[DeltaSyncVectorIndexSpecRequest], direct_access_index_spec: Optional[DirectAccessVectorIndexSpec], endpoint_name: Optional[str]]) -> CreateVectorIndexResponse + + Create an index. + + Create a new index. + + :param name: str + Name of the index + :param primary_key: str + Primary key of the index + :param index_type: :class:`VectorIndexType` + There are 2 types of Vector Search indexes: + + - `DELTA_SYNC`: An index that automatically syncs with a source Delta Table, automatically and + incrementally updating the index as the underlying data in the Delta Table changes. - + `DIRECT_ACCESS`: An index that supports direct read and write of vectors and metadata through our + REST and SDK APIs. With this model, the user manages index updates. + :param delta_sync_vector_index_spec: :class:`DeltaSyncVectorIndexSpecRequest` (optional) + Specification for Delta Sync Index. Required if `index_type` is `DELTA_SYNC`. + :param direct_access_index_spec: :class:`DirectAccessVectorIndexSpec` (optional) + Specification for Direct Vector Access Index. Required if `index_type` is `DIRECT_ACCESS`. + :param endpoint_name: str (optional) + Name of the endpoint to be used for serving the index + + :returns: :class:`CreateVectorIndexResponse` + + + .. py:method:: delete_data_vector_index(name: str, primary_keys: List[str]) -> DeleteDataVectorIndexResponse + + Delete data from index. + + Handles the deletion of data from a specified vector index. + + :param name: str + Name of the vector index where data is to be deleted. Must be a Direct Vector Access Index. + :param primary_keys: List[str] + List of primary keys for the data to be deleted. + + :returns: :class:`DeleteDataVectorIndexResponse` + + + .. py:method:: delete_index(index_name: str) + + Delete an index. + + Delete an index. + + :param index_name: str + Name of the index + + + + + .. py:method:: get_index(index_name: str) -> VectorIndex + + Get an index. + + Get an index. + + :param index_name: str + Name of the index + + :returns: :class:`VectorIndex` + + + .. py:method:: list_indexes(endpoint_name: str [, page_token: Optional[str]]) -> Iterator[MiniVectorIndex] + + List indexes. + + List all indexes in the given endpoint. + + :param endpoint_name: str + Name of the endpoint + :param page_token: str (optional) + Token for pagination + + :returns: Iterator over :class:`MiniVectorIndex` + + + .. py:method:: query_index(index_name: str, columns: List[str] [, filters_json: Optional[str], num_results: Optional[int], query_text: Optional[str], query_vector: Optional[List[float]]]) -> QueryVectorIndexResponse + + Query an index. + + Query the specified vector index. + + :param index_name: str + Name of the vector index to query. + :param columns: List[str] + List of column names to include in the response. + :param filters_json: str (optional) + JSON string representing query filters. + + Example filters: - `{"id <": 5}`: Filter for id less than 5. - `{"id >": 5}`: Filter for id greater + than 5. - `{"id <=": 5}`: Filter for id less than equal to 5. - `{"id >=": 5}`: Filter for id + greater than equal to 5. - `{"id": 5}`: Filter for id equal to 5. + :param num_results: int (optional) + Number of results to return. Defaults to 10. + :param query_text: str (optional) + Query text. Required for Delta Sync Index using model endpoint. + :param query_vector: List[float] (optional) + Query vector. Required for Direct Vector Access Index and Delta Sync Index using self-managed + vectors. + + :returns: :class:`QueryVectorIndexResponse` + + + .. py:method:: sync_index(index_name: str) + + Synchronize an index. + + Triggers a synchronization process for a specified vector index. + + :param index_name: str + Name of the vector index to synchronize. Must be a Delta Sync Index. + + + + + .. py:method:: upsert_data_vector_index(name: str, inputs_json: str) -> UpsertDataVectorIndexResponse + + Upsert data into an index. + + Handles the upserting of data into a specified vector index. + + :param name: str + Name of the vector index where data is to be upserted. Must be a Direct Vector Access Index. + :param inputs_json: str + JSON string representing the data to be upserted. + + :returns: :class:`UpsertDataVectorIndexResponse` + \ No newline at end of file diff --git a/docs/workspace/workspace/git_credentials.rst b/docs/workspace/workspace/git_credentials.rst new file mode 100644 index 000000000..f43b25b07 --- /dev/null +++ b/docs/workspace/workspace/git_credentials.rst @@ -0,0 +1,146 @@ +``w.git_credentials``: Git Credentials +====================================== +.. currentmodule:: databricks.sdk.service.workspace + +.. py:class:: GitCredentialsAPI + + Registers personal access token for Databricks to do operations on behalf of the user. + + See [more info]. + + [more info]: https://docs.databricks.com/repos/get-access-tokens-from-git-provider.html + + .. py:method:: create(git_provider: str [, git_username: Optional[str], personal_access_token: Optional[str]]) -> CreateCredentialsResponse + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + cr = w.git_credentials.create(git_provider="gitHub", git_username="test", personal_access_token="test") + + # cleanup + w.git_credentials.delete(credential_id=cr.credential_id) + + Create a credential entry. + + Creates a Git credential entry for the user. Only one Git credential per user is supported, so any + attempts to create credentials if an entry already exists will fail. Use the PATCH endpoint to update + existing credentials, or the DELETE endpoint to delete existing credentials. + + :param git_provider: str + Git provider. This field is case-insensitive. The available Git providers are gitHub, + bitbucketCloud, gitLab, azureDevOpsServices, gitHubEnterprise, bitbucketServer, + gitLabEnterpriseEdition and awsCodeCommit. + :param git_username: str (optional) + Git username. + :param personal_access_token: str (optional) + The personal access token used to authenticate to the corresponding Git provider. + + :returns: :class:`CreateCredentialsResponse` + + + .. py:method:: delete(credential_id: int) + + Delete a credential. + + Deletes the specified Git credential. + + :param credential_id: int + The ID for the corresponding credential to access. + + + + + .. py:method:: get(credential_id: int) -> CredentialInfo + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + cr = w.git_credentials.create(git_provider="gitHub", git_username="test", personal_access_token="test") + + by_id = w.git_credentials.get(credential_id=cr.credential_id) + + # cleanup + w.git_credentials.delete(credential_id=cr.credential_id) + + Get a credential entry. + + Gets the Git credential with the specified credential ID. + + :param credential_id: int + The ID for the corresponding credential to access. + + :returns: :class:`CredentialInfo` + + + .. py:method:: list() -> Iterator[CredentialInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + list = w.git_credentials.list() + + Get Git credentials. + + Lists the calling user's Git credentials. One credential per user is supported. + + :returns: Iterator over :class:`CredentialInfo` + + + .. py:method:: update(credential_id: int [, git_provider: Optional[str], git_username: Optional[str], personal_access_token: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + cr = w.git_credentials.create(git_provider="gitHub", git_username="test", personal_access_token="test") + + w.git_credentials.update(credential_id=cr.credential_id, + git_provider="gitHub", + git_username=f'sdk-{time.time_ns()}@example.com', + personal_access_token=f'sdk-{time.time_ns()}') + + # cleanup + w.git_credentials.delete(credential_id=cr.credential_id) + + Update a credential. + + Updates the specified Git credential. + + :param credential_id: int + The ID for the corresponding credential to access. + :param git_provider: str (optional) + Git provider. This field is case-insensitive. The available Git providers are gitHub, + bitbucketCloud, gitLab, azureDevOpsServices, gitHubEnterprise, bitbucketServer, + gitLabEnterpriseEdition and awsCodeCommit. + :param git_username: str (optional) + Git username. + :param personal_access_token: str (optional) + The personal access token used to authenticate to the corresponding Git provider. + + + \ No newline at end of file diff --git a/docs/workspace/workspace/index.rst b/docs/workspace/workspace/index.rst new file mode 100644 index 000000000..c52ba1acb --- /dev/null +++ b/docs/workspace/workspace/index.rst @@ -0,0 +1,13 @@ + +Workspace +========= + +Manage workspace-level entities that include notebooks, Git checkouts, and secrets + +.. toctree:: + :maxdepth: 1 + + git_credentials + repos + secrets + workspace \ No newline at end of file diff --git a/docs/workspace/workspace/repos.rst b/docs/workspace/workspace/repos.rst new file mode 100644 index 000000000..584ad70b3 --- /dev/null +++ b/docs/workspace/workspace/repos.rst @@ -0,0 +1,219 @@ +``w.repos``: Repos +================== +.. currentmodule:: databricks.sdk.service.workspace + +.. py:class:: ReposAPI + + The Repos API allows users to manage their git repos. Users can use the API to access all repos that they + have manage permissions on. + + Databricks Repos is a visual Git client in Databricks. It supports common Git operations such a cloning a + repository, committing and pushing, pulling, branch management, and visual comparison of diffs when + committing. + + Within Repos you can develop code in notebooks or other files and follow data science and engineering code + development best practices using Git for version control, collaboration, and CI/CD. + + .. py:method:: create(url: str, provider: str [, path: Optional[str], sparse_checkout: Optional[SparseCheckout]]) -> RepoInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + root = f'sdk-{time.time_ns()}' + + ri = w.repos.create(path=root, url="https://github.com/shreyas-goenka/empty-repo.git", provider="github") + + # cleanup + w.repos.delete(repo_id=ri.id) + + Create a repo. + + Creates a repo in the workspace and links it to the remote Git repo specified. Note that repos created + programmatically must be linked to a remote Git repo, unlike repos created in the browser. + + :param url: str + URL of the Git repository to be linked. + :param provider: str + Git provider. This field is case-insensitive. The available Git providers are gitHub, + bitbucketCloud, gitLab, azureDevOpsServices, gitHubEnterprise, bitbucketServer, + gitLabEnterpriseEdition and awsCodeCommit. + :param path: str (optional) + Desired path for the repo in the workspace. Must be in the format /Repos/{folder}/{repo-name}. + :param sparse_checkout: :class:`SparseCheckout` (optional) + If specified, the repo will be created with sparse checkout enabled. You cannot enable/disable + sparse checkout after the repo is created. + + :returns: :class:`RepoInfo` + + + .. py:method:: delete(repo_id: int) + + Delete a repo. + + Deletes the specified repo. + + :param repo_id: int + The ID for the corresponding repo to access. + + + + + .. py:method:: get(repo_id: int) -> RepoInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + root = f'sdk-{time.time_ns()}' + + ri = w.repos.create(path=root, url="https://github.com/shreyas-goenka/empty-repo.git", provider="github") + + by_id = w.repos.get(repo_id=ri.id) + + # cleanup + w.repos.delete(repo_id=ri.id) + + Get a repo. + + Returns the repo with the given repo ID. + + :param repo_id: int + The ID for the corresponding repo to access. + + :returns: :class:`RepoInfo` + + + .. py:method:: get_permission_levels(repo_id: str) -> GetRepoPermissionLevelsResponse + + Get repo permission levels. + + Gets the permission levels that a user can have on an object. + + :param repo_id: str + The repo for which to get or manage permissions. + + :returns: :class:`GetRepoPermissionLevelsResponse` + + + .. py:method:: get_permissions(repo_id: str) -> RepoPermissions + + Get repo permissions. + + Gets the permissions of a repo. Repos can inherit permissions from their root object. + + :param repo_id: str + The repo for which to get or manage permissions. + + :returns: :class:`RepoPermissions` + + + .. py:method:: list( [, next_page_token: Optional[str], path_prefix: Optional[str]]) -> Iterator[RepoInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import workspace + + w = WorkspaceClient() + + all = w.repos.list(workspace.ListReposRequest()) + + Get repos. + + Returns repos that the calling user has Manage permissions on. Results are paginated with each page + containing twenty repos. + + :param next_page_token: str (optional) + Token used to get the next page of results. If not specified, returns the first page of results as + well as a next page token if there are more results. + :param path_prefix: str (optional) + Filters repos that have paths starting with the given path prefix. + + :returns: Iterator over :class:`RepoInfo` + + + .. py:method:: set_permissions(repo_id: str [, access_control_list: Optional[List[RepoAccessControlRequest]]]) -> RepoPermissions + + Set repo permissions. + + Sets permissions on a repo. Repos can inherit permissions from their root object. + + :param repo_id: str + The repo for which to get or manage permissions. + :param access_control_list: List[:class:`RepoAccessControlRequest`] (optional) + + :returns: :class:`RepoPermissions` + + + .. py:method:: update(repo_id: int [, branch: Optional[str], sparse_checkout: Optional[SparseCheckoutUpdate], tag: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + root = f'sdk-{time.time_ns()}' + + ri = w.repos.create(path=root, url="https://github.com/shreyas-goenka/empty-repo.git", provider="github") + + w.repos.update(repo_id=ri.id, branch="foo") + + # cleanup + w.repos.delete(repo_id=ri.id) + + Update a repo. + + Updates the repo to a different branch or tag, or updates the repo to the latest commit on the same + branch. + + :param repo_id: int + The ID for the corresponding repo to access. + :param branch: str (optional) + Branch that the local version of the repo is checked out to. + :param sparse_checkout: :class:`SparseCheckoutUpdate` (optional) + If specified, update the sparse checkout settings. The update will fail if sparse checkout is not + enabled for the repo. + :param tag: str (optional) + Tag that the local version of the repo is checked out to. Updating the repo to a tag puts the repo + in a detached HEAD state. Before committing new changes, you must update the repo to a branch + instead of the detached HEAD. + + + + + .. py:method:: update_permissions(repo_id: str [, access_control_list: Optional[List[RepoAccessControlRequest]]]) -> RepoPermissions + + Update repo permissions. + + Updates the permissions on a repo. Repos can inherit permissions from their root object. + + :param repo_id: str + The repo for which to get or manage permissions. + :param access_control_list: List[:class:`RepoAccessControlRequest`] (optional) + + :returns: :class:`RepoPermissions` + \ No newline at end of file diff --git a/docs/workspace/workspace/secrets.rst b/docs/workspace/workspace/secrets.rst new file mode 100644 index 000000000..add03d224 --- /dev/null +++ b/docs/workspace/workspace/secrets.rst @@ -0,0 +1,367 @@ +``w.secrets``: Secret +===================== +.. currentmodule:: databricks.sdk.service.workspace + +.. py:class:: SecretsAPI + + The Secrets API allows you to manage secrets, secret scopes, and access permissions. + + Sometimes accessing data requires that you authenticate to external data sources through JDBC. Instead of + directly entering your credentials into a notebook, use Databricks secrets to store your credentials and + reference them in notebooks and jobs. + + Administrators, secret creators, and users granted permission can read Databricks secrets. While + Databricks makes an effort to redact secret values that might be displayed in notebooks, it is not + possible to prevent such users from reading secrets. + + .. py:method:: create_scope(scope: str [, backend_azure_keyvault: Optional[AzureKeyVaultSecretScopeMetadata], initial_manage_principal: Optional[str], scope_backend_type: Optional[ScopeBackendType]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + key_name = f'sdk-{time.time_ns()}' + + scope_name = f'sdk-{time.time_ns()}' + + w.secrets.create_scope(scope=scope_name) + + # cleanup + w.secrets.delete_secret(scope=scope_name, key=key_name) + w.secrets.delete_scope(scope=scope_name) + + Create a new secret scope. + + The scope name must consist of alphanumeric characters, dashes, underscores, and periods, and may not + exceed 128 characters. The maximum number of scopes in a workspace is 100. + + :param scope: str + Scope name requested by the user. Scope names are unique. + :param backend_azure_keyvault: :class:`AzureKeyVaultSecretScopeMetadata` (optional) + The metadata for the secret scope if the type is `AZURE_KEYVAULT` + :param initial_manage_principal: str (optional) + The principal that is initially granted `MANAGE` permission to the created scope. + :param scope_backend_type: :class:`ScopeBackendType` (optional) + The backend type the scope will be created with. If not specified, will default to `DATABRICKS` + + + + + .. py:method:: delete_acl(scope: str, principal: str) + + Delete an ACL. + + Deletes the given ACL on the given scope. + + Users must have the `MANAGE` permission to invoke this API. Throws `RESOURCE_DOES_NOT_EXIST` if no + such secret scope, principal, or ACL exists. Throws `PERMISSION_DENIED` if the user does not have + permission to make this API call. + + :param scope: str + The name of the scope to remove permissions from. + :param principal: str + The principal to remove an existing ACL from. + + + + + .. py:method:: delete_scope(scope: str) + + Delete a secret scope. + + Deletes a secret scope. + + Throws `RESOURCE_DOES_NOT_EXIST` if the scope does not exist. Throws `PERMISSION_DENIED` if the user + does not have permission to make this API call. + + :param scope: str + Name of the scope to delete. + + + + + .. py:method:: delete_secret(scope: str, key: str) + + Delete a secret. + + Deletes the secret stored in this secret scope. You must have `WRITE` or `MANAGE` permission on the + secret scope. + + Throws `RESOURCE_DOES_NOT_EXIST` if no such secret scope or secret exists. Throws `PERMISSION_DENIED` + if the user does not have permission to make this API call. + + :param scope: str + The name of the scope that contains the secret to delete. + :param key: str + Name of the secret to delete. + + + + + .. py:method:: get_acl(scope: str, principal: str) -> AclItem + + Get secret ACL details. + + Gets the details about the given ACL, such as the group and permission. Users must have the `MANAGE` + permission to invoke this API. + + Throws `RESOURCE_DOES_NOT_EXIST` if no such secret scope exists. Throws `PERMISSION_DENIED` if the + user does not have permission to make this API call. + + :param scope: str + The name of the scope to fetch ACL information from. + :param principal: str + The principal to fetch ACL information for. + + :returns: :class:`AclItem` + + + .. py:method:: get_secret(scope: str, key: str) -> GetSecretResponse + + Get a secret. + + Gets the bytes representation of a secret value for the specified scope and key. + + Users need the READ permission to make this call. + + Note that the secret value returned is in bytes. The interpretation of the bytes is determined by the + caller in DBUtils and the type the data is decoded into. + + Throws ``PERMISSION_DENIED`` if the user does not have permission to make this API call. Throws + ``RESOURCE_DOES_NOT_EXIST`` if no such secret or secret scope exists. + + :param scope: str + The name of the scope to fetch secret information from. + :param key: str + The key to fetch secret for. + + :returns: :class:`GetSecretResponse` + + + .. py:method:: list_acls(scope: str) -> Iterator[AclItem] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + key_name = f'sdk-{time.time_ns()}' + + scope_name = f'sdk-{time.time_ns()}' + + w.secrets.create_scope(scope=scope_name) + + acls = w.secrets.list_acls(scope=scope_name) + + # cleanup + w.secrets.delete_secret(scope=scope_name, key=key_name) + w.secrets.delete_scope(scope=scope_name) + + Lists ACLs. + + List the ACLs for a given secret scope. Users must have the `MANAGE` permission to invoke this API. + + Throws `RESOURCE_DOES_NOT_EXIST` if no such secret scope exists. Throws `PERMISSION_DENIED` if the + user does not have permission to make this API call. + + :param scope: str + The name of the scope to fetch ACL information from. + + :returns: Iterator over :class:`AclItem` + + + .. py:method:: list_scopes() -> Iterator[SecretScope] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + scopes = w.secrets.list_scopes() + + List all scopes. + + Lists all secret scopes available in the workspace. + + Throws `PERMISSION_DENIED` if the user does not have permission to make this API call. + + :returns: Iterator over :class:`SecretScope` + + + .. py:method:: list_secrets(scope: str) -> Iterator[SecretMetadata] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + key_name = f'sdk-{time.time_ns()}' + + scope_name = f'sdk-{time.time_ns()}' + + w.secrets.create_scope(scope=scope_name) + + scrts = w.secrets.list_secrets(scope=scope_name) + + # cleanup + w.secrets.delete_secret(scope=scope_name, key=key_name) + w.secrets.delete_scope(scope=scope_name) + + List secret keys. + + Lists the secret keys that are stored at this scope. This is a metadata-only operation; secret data + cannot be retrieved using this API. Users need the READ permission to make this call. + + The lastUpdatedTimestamp returned is in milliseconds since epoch. Throws `RESOURCE_DOES_NOT_EXIST` if + no such secret scope exists. Throws `PERMISSION_DENIED` if the user does not have permission to make + this API call. + + :param scope: str + The name of the scope to list secrets within. + + :returns: Iterator over :class:`SecretMetadata` + + + .. py:method:: put_acl(scope: str, principal: str, permission: AclPermission) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import workspace + + w = WorkspaceClient() + + key_name = f'sdk-{time.time_ns()}' + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + scope_name = f'sdk-{time.time_ns()}' + + w.secrets.create_scope(scope=scope_name) + + w.secrets.put_acl(scope=scope_name, permission=workspace.AclPermission.MANAGE, principal=group.display_name) + + # cleanup + w.groups.delete(id=group.id) + w.secrets.delete_secret(scope=scope_name, key=key_name) + w.secrets.delete_scope(scope=scope_name) + + Create/update an ACL. + + Creates or overwrites the Access Control List (ACL) associated with the given principal (user or + group) on the specified scope point. + + In general, a user or group will use the most powerful permission available to them, and permissions + are ordered as follows: + + * `MANAGE` - Allowed to change ACLs, and read and write to this secret scope. * `WRITE` - Allowed to + read and write to this secret scope. * `READ` - Allowed to read this secret scope and list what + secrets are available. + + Note that in general, secret values can only be read from within a command on a cluster (for example, + through a notebook). There is no API to read the actual secret value material outside of a cluster. + However, the user's permission will be applied based on who is executing the command, and they must + have at least READ permission. + + Users must have the `MANAGE` permission to invoke this API. + + The principal is a user or group name corresponding to an existing Databricks principal to be granted + or revoked access. + + Throws `RESOURCE_DOES_NOT_EXIST` if no such secret scope exists. Throws `RESOURCE_ALREADY_EXISTS` if a + permission for the principal already exists. Throws `INVALID_PARAMETER_VALUE` if the permission or + principal is invalid. Throws `PERMISSION_DENIED` if the user does not have permission to make this API + call. + + :param scope: str + The name of the scope to apply permissions to. + :param principal: str + The principal in which the permission is applied. + :param permission: :class:`AclPermission` + The permission level applied to the principal. + + + + + .. py:method:: put_secret(scope: str, key: str [, bytes_value: Optional[str], string_value: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + key_name = f'sdk-{time.time_ns()}' + + scope_name = f'sdk-{time.time_ns()}' + + w.secrets.create_scope(scope=scope_name) + + w.secrets.put_secret(scope=scope_name, key=key_name, string_value=f'sdk-{time.time_ns()}') + + # cleanup + w.secrets.delete_secret(scope=scope_name, key=key_name) + w.secrets.delete_scope(scope=scope_name) + + Add a secret. + + Inserts a secret under the provided scope with the given name. If a secret already exists with the + same name, this command overwrites the existing secret's value. The server encrypts the secret using + the secret scope's encryption settings before storing it. + + You must have `WRITE` or `MANAGE` permission on the secret scope. The secret key must consist of + alphanumeric characters, dashes, underscores, and periods, and cannot exceed 128 characters. The + maximum allowed secret value size is 128 KB. The maximum number of secrets in a given scope is 1000. + + The input fields "string_value" or "bytes_value" specify the type of the secret, which will determine + the value returned when the secret value is requested. Exactly one must be specified. + + Throws `RESOURCE_DOES_NOT_EXIST` if no such secret scope exists. Throws `RESOURCE_LIMIT_EXCEEDED` if + maximum number of secrets in scope is exceeded. Throws `INVALID_PARAMETER_VALUE` if the key name or + value length is invalid. Throws `PERMISSION_DENIED` if the user does not have permission to make this + API call. + + :param scope: str + The name of the scope to which the secret will be associated with. + :param key: str + A unique name to identify the secret. + :param bytes_value: str (optional) + If specified, value will be stored as bytes. + :param string_value: str (optional) + If specified, note that the value will be stored in UTF-8 (MB4) form. + + + \ No newline at end of file diff --git a/docs/workspace/workspace/workspace.rst b/docs/workspace/workspace/workspace.rst new file mode 100644 index 000000000..53b875b0c --- /dev/null +++ b/docs/workspace/workspace/workspace.rst @@ -0,0 +1,342 @@ +``w.workspace``: Workspace +========================== +.. currentmodule:: databricks.sdk.service.workspace + +.. py:class:: WorkspaceExt + + The Workspace API allows you to list, import, export, and delete notebooks and folders. + + A notebook is a web-based interface to a document that contains runnable code, visualizations, and + explanatory text. + + .. py:method:: delete(path: str [, recursive: Optional[bool]]) + + Delete a workspace object. + + Deletes an object or a directory (and optionally recursively deletes all objects in the directory). * + If `path` does not exist, this call returns an error `RESOURCE_DOES_NOT_EXIST`. * If `path` is a + non-empty directory and `recursive` is set to `false`, this call returns an error + `DIRECTORY_NOT_EMPTY`. + + Object deletion cannot be undone and deleting a directory recursively is not atomic. + + :param path: str + The absolute path of the notebook or directory. + :param recursive: bool (optional) + The flag that specifies whether to delete the object recursively. It is `false` by default. Please + note this deleting directory is not atomic. If it fails in the middle, some of objects under this + directory may be deleted and cannot be undone. + + + + + .. py:method:: download(path: str [, format: ExportFormat]) -> BinaryIO + + + Usage: + + .. code-block:: + + import io + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service.workspace import ImportFormat + + w = WorkspaceClient() + + py_file = f'/Users/{w.current_user.me().user_name}/file-{time.time_ns()}.py' + + w.workspace.upload(py_file, io.BytesIO(b'print(1)'), format=ImportFormat.AUTO) + with w.workspace.download(py_file) as f: + content = f.read() + assert content == b'print(1)' + + w.workspace.delete(py_file) + + + Downloads notebook or file from the workspace + + :param path: location of the file or notebook on workspace. + :param format: By default, `ExportFormat.SOURCE`. If using `ExportFormat.AUTO` the `path` + is imported or exported as either a workspace file or a notebook, depending + on an analysis of the `item`’s extension and the header content provided in + the request. + :return: file-like `io.BinaryIO` of the `path` contents. + + + .. py:method:: export(path: str [, format: Optional[ExportFormat]]) -> ExportResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import workspace + + w = WorkspaceClient() + + notebook = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + export_response = w.workspace.export(format=workspace.ExportFormat.SOURCE, path=notebook) + + Export a workspace object. + + Exports an object or the contents of an entire directory. + + If `path` does not exist, this call returns an error `RESOURCE_DOES_NOT_EXIST`. + + If the exported data would exceed size limit, this call returns `MAX_NOTEBOOK_SIZE_EXCEEDED`. + Currently, this API does not support exporting a library. + + :param path: str + The absolute path of the object or directory. Exporting a directory is only supported for the `DBC`, + `SOURCE`, and `AUTO` format. + :param format: :class:`ExportFormat` (optional) + This specifies the format of the exported file. By default, this is `SOURCE`. + + The value is case sensitive. + + - `SOURCE`: The notebook is exported as source code. Directory exports will not include non-notebook + entries. - `HTML`: The notebook is exported as an HTML file. - `JUPYTER`: The notebook is exported + as a Jupyter/IPython Notebook file. - `DBC`: The notebook is exported in Databricks archive format. + Directory exports will not include non-notebook entries. - `R_MARKDOWN`: The notebook is exported to + R Markdown format. - `AUTO`: The object or directory is exported depending on the objects type. + Directory exports will include notebooks and workspace files. + + :returns: :class:`ExportResponse` + + + .. py:method:: get_permission_levels(workspace_object_type: str, workspace_object_id: str) -> GetWorkspaceObjectPermissionLevelsResponse + + Get workspace object permission levels. + + Gets the permission levels that a user can have on an object. + + :param workspace_object_type: str + The workspace object type for which to get or manage permissions. + :param workspace_object_id: str + The workspace object for which to get or manage permissions. + + :returns: :class:`GetWorkspaceObjectPermissionLevelsResponse` + + + .. py:method:: get_permissions(workspace_object_type: str, workspace_object_id: str) -> WorkspaceObjectPermissions + + Get workspace object permissions. + + Gets the permissions of a workspace object. Workspace objects can inherit permissions from their + parent objects or root object. + + :param workspace_object_type: str + The workspace object type for which to get or manage permissions. + :param workspace_object_id: str + The workspace object for which to get or manage permissions. + + :returns: :class:`WorkspaceObjectPermissions` + + + .. py:method:: get_status(path: str) -> ObjectInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + obj = w.workspace.get_status(path=notebook_path) + + Get status. + + Gets the status of an object or a directory. If `path` does not exist, this call returns an error + `RESOURCE_DOES_NOT_EXIST`. + + :param path: str + The absolute path of the notebook or directory. + + :returns: :class:`ObjectInfo` + + + .. py:method:: import_(path: str [, content: Optional[str], format: Optional[ImportFormat], language: Optional[Language], overwrite: Optional[bool]]) + + + Usage: + + .. code-block:: + + import base64 + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import workspace + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + w.workspace.import_(content=base64.b64encode(("CREATE LIVE TABLE dlt_sample AS SELECT 1").encode()).decode(), + format=workspace.ImportFormat.SOURCE, + language=workspace.Language.SQL, + overwrite=True, + path=notebook_path) + + Import a workspace object. + + Imports a workspace object (for example, a notebook or file) or the contents of an entire directory. + If `path` already exists and `overwrite` is set to `false`, this call returns an error + `RESOURCE_ALREADY_EXISTS`. To import a directory, you can use either the `DBC` format or the `SOURCE` + format with the `language` field unset. To import a single file as `SOURCE`, you must set the + `language` field. + + :param path: str + The absolute path of the object or directory. Importing a directory is only supported for the `DBC` + and `SOURCE` formats. + :param content: str (optional) + The base64-encoded content. This has a limit of 10 MB. + + If the limit (10MB) is exceeded, exception with error code **MAX_NOTEBOOK_SIZE_EXCEEDED** is thrown. + This parameter might be absent, and instead a posted file is used. + :param format: :class:`ImportFormat` (optional) + This specifies the format of the file to be imported. + + The value is case sensitive. + + - `AUTO`: The item is imported depending on an analysis of the item's extension and the header + content provided in the request. If the item is imported as a notebook, then the item's extension is + automatically removed. - `SOURCE`: The notebook or directory is imported as source code. - `HTML`: + The notebook is imported as an HTML file. - `JUPYTER`: The notebook is imported as a Jupyter/IPython + Notebook file. - `DBC`: The notebook is imported in Databricks archive format. Required for + directories. - `R_MARKDOWN`: The notebook is imported from R Markdown format. + :param language: :class:`Language` (optional) + The language of the object. This value is set only if the object type is `NOTEBOOK`. + :param overwrite: bool (optional) + The flag that specifies whether to overwrite existing object. It is `false` by default. For `DBC` + format, `overwrite` is not supported since it may contain a directory. + + + + + .. py:method:: list(path: str [, notebooks_modified_after: int, recursive: bool = False]) -> ObjectInfo + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + names = [] + for i in w.workspace.list(f'/Users/{w.current_user.me().user_name}', recursive=True): + names.append(i.path) + assert len(names) > 0 + + List workspace objects + + :param recursive: bool + Optionally invoke recursive traversal + + :returns: Iterator of workspaceObjectInfo + + + .. py:method:: mkdirs(path: str) + + Create a directory. + + Creates the specified directory (and necessary parent directories if they do not exist). If there is + an object (not a directory) at any prefix of the input path, this call returns an error + `RESOURCE_ALREADY_EXISTS`. + + Note that if this operation fails it may have succeeded in creating some of the necessary parent + directories. + + :param path: str + The absolute path of the directory. If the parent directories do not exist, it will also create + them. If the directory already exists, this command will do nothing and succeed. + + + + + .. py:method:: set_permissions(workspace_object_type: str, workspace_object_id: str [, access_control_list: Optional[List[WorkspaceObjectAccessControlRequest]]]) -> WorkspaceObjectPermissions + + Set workspace object permissions. + + Sets permissions on a workspace object. Workspace objects can inherit permissions from their parent + objects or root object. + + :param workspace_object_type: str + The workspace object type for which to get or manage permissions. + :param workspace_object_id: str + The workspace object for which to get or manage permissions. + :param access_control_list: List[:class:`WorkspaceObjectAccessControlRequest`] (optional) + + :returns: :class:`WorkspaceObjectPermissions` + + + .. py:method:: update_permissions(workspace_object_type: str, workspace_object_id: str [, access_control_list: Optional[List[WorkspaceObjectAccessControlRequest]]]) -> WorkspaceObjectPermissions + + Update workspace object permissions. + + Updates the permissions on a workspace object. Workspace objects can inherit permissions from their + parent objects or root object. + + :param workspace_object_type: str + The workspace object type for which to get or manage permissions. + :param workspace_object_id: str + The workspace object for which to get or manage permissions. + :param access_control_list: List[:class:`WorkspaceObjectAccessControlRequest`] (optional) + + :returns: :class:`WorkspaceObjectPermissions` + + + .. py:method:: upload(path: str, content: BinaryIO [, format: ImportFormat, language: Language, overwrite: bool = False]) + + + Usage: + + .. code-block:: + + import io + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + notebook = f'/Users/{w.current_user.me().user_name}/notebook-{time.time_ns()}.py' + + w.workspace.upload(notebook, io.BytesIO(b'print(1)')) + with w.workspace.download(notebook) as f: + content = f.read() + assert content == b'# Databricks notebook source\nprint(1)' + + w.workspace.delete(notebook) + + + Uploads a workspace object (for example, a notebook or file) or the contents of an entire + directory (`DBC` format). + + Errors: + * `RESOURCE_ALREADY_EXISTS`: if `path` already exists no `overwrite=True`. + * `INVALID_PARAMETER_VALUE`: if `format` and `content` values are not compatible. + + :param path: target location of the file on workspace. + :param content: file-like `io.BinaryIO` of the `path` contents. + :param format: By default, `ImportFormat.SOURCE`. If using `ImportFormat.AUTO` the `path` + is imported or exported as either a workspace file or a notebook, depending + on an analysis of the `item`’s extension and the header content provided in + the request. In addition, if the `path` is imported as a notebook, then + the `item`’s extension is automatically removed. + :param language: Only required if using `ExportFormat.SOURCE`. + \ No newline at end of file diff --git a/examples/external_locations/list_external_locations_on_aws.py b/examples/external_locations/list_external_locations_on_aws.py index d847088b9..2fb3b4b01 100755 --- a/examples/external_locations/list_external_locations_on_aws.py +++ b/examples/external_locations/list_external_locations_on_aws.py @@ -1,5 +1,6 @@ from databricks.sdk import WorkspaceClient +from databricks.sdk.service import catalog w = WorkspaceClient() -all = w.external_locations.list() +all = w.external_locations.list(catalog.ListExternalLocationsRequest()) diff --git a/examples/storage_credentials/list_storage_credentials_on_aws.py b/examples/storage_credentials/list_storage_credentials_on_aws.py index fad05f4af..d12a0aa4e 100755 --- a/examples/storage_credentials/list_storage_credentials_on_aws.py +++ b/examples/storage_credentials/list_storage_credentials_on_aws.py @@ -1,5 +1,6 @@ from databricks.sdk import WorkspaceClient +from databricks.sdk.service import catalog w = WorkspaceClient() -all = w.storage_credentials.list() +all = w.storage_credentials.list(catalog.ListStorageCredentialsRequest()) diff --git a/tests/conftest.py b/tests/conftest.py index 748bd6794..80753ae95 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,7 +4,8 @@ import pytest as pytest from pyfakefs.fake_filesystem_unittest import Patcher -from databricks.sdk.core import Config, credentials_provider +from databricks.sdk.core import Config +from databricks.sdk.credentials_provider import credentials_provider @credentials_provider('noop', []) diff --git a/tests/test_auth.py b/tests/test_auth.py index f52c66390..504e14439 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -204,7 +204,7 @@ def test_config_azure_cli_host(monkeypatch): @raises( - "default auth: azure-cli: cannot get access token: This is just a failing script. Config: azure_workspace_resource_id=/sub/rg/ws" + "default auth: cannot configure default credentials, please check https://docs.databricks.com/en/dev-tools/auth.html#databricks-client-unified-authentication to configure credentials for your preferred authentication method. Config: azure_workspace_resource_id=/sub/rg/ws" ) def test_config_azure_cli_host_fail(monkeypatch): monkeypatch.setenv('FAIL', 'yes') diff --git a/tests/test_core.py b/tests/test_core.py index d7e2c8f41..ca2eaac31 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -15,10 +15,13 @@ from databricks.sdk import WorkspaceClient from databricks.sdk.azure import ENVIRONMENTS, AzureEnvironment -from databricks.sdk.core import (ApiClient, CliTokenSource, Config, - CredentialsProvider, DatabricksCliTokenSource, - DatabricksError, HeaderFactory, - StreamingResponse, databricks_cli) +from databricks.sdk.core import (ApiClient, Config, DatabricksError, + StreamingResponse) +from databricks.sdk.credentials_provider import (CliTokenSource, + CredentialsProvider, + DatabricksCliTokenSource, + HeaderFactory, databricks_cli) +from databricks.sdk.environments import Cloud, DatabricksEnvironment from databricks.sdk.service.catalog import PermissionsChange from databricks.sdk.service.iam import AccessControlRequest from databricks.sdk.version import __version__ @@ -282,9 +285,10 @@ class DummyResponse(requests.Response): _closed: bool = False def __init__(self, content: List[bytes]) -> None: + super().__init__() self._content = iter(content) - def iter_content(self, chunk_size: int = 1) -> Iterator[bytes]: + def iter_content(self, chunk_size: int = 1, decode_unicode=False) -> Iterator[bytes]: return self._content def close(self): @@ -546,14 +550,33 @@ def inner(h: BaseHTTPRequestHandler): with http_fixture_server(inner) as host: monkeypatch.setenv('ACTIONS_ID_TOKEN_REQUEST_URL', f'{host}/oidc') monkeypatch.setenv('ACTIONS_ID_TOKEN_REQUEST_TOKEN', 'gh-actions-token') - ENVIRONMENTS[host] = AzureEnvironment(name=host, - service_management_endpoint=host + '/', - resource_manager_endpoint=host + '/', - active_directory_endpoint=host + '/') + azure_environment = AzureEnvironment(name=host, + service_management_endpoint=host + '/', + resource_manager_endpoint=host + '/', + active_directory_endpoint=host + '/') + databricks_environment = DatabricksEnvironment(Cloud.AZURE, + '...', + azure_environment=azure_environment) cfg = Config(host=host, azure_workspace_resource_id=..., azure_client_id='test', - azure_environment=host) + azure_environment=host, + databricks_environment=databricks_environment) headers = cfg.authenticate() assert {'Authorization': 'Taker this-is-it'} == headers + + +@pytest.mark.parametrize(['azure_environment', 'expected'], + [('PUBLIC', ENVIRONMENTS['PUBLIC']), ('USGOVERNMENT', ENVIRONMENTS['USGOVERNMENT']), + ('CHINA', ENVIRONMENTS['CHINA']), ('public', ENVIRONMENTS['PUBLIC']), + ('usgovernment', ENVIRONMENTS['USGOVERNMENT']), ('china', ENVIRONMENTS['CHINA']), + # Kept for historical compatibility + ('AzurePublicCloud', ENVIRONMENTS['PUBLIC']), + ('AzureUSGovernment', ENVIRONMENTS['USGOVERNMENT']), + ('AzureChinaCloud', ENVIRONMENTS['CHINA']), ]) +def test_azure_environment(azure_environment, expected): + c = Config(credentials_provider=noop_credentials, + azure_workspace_resource_id='...', + azure_environment=azure_environment) + assert c.arm_environment == expected diff --git a/tests/test_environments.py b/tests/test_environments.py new file mode 100644 index 000000000..c14426f0d --- /dev/null +++ b/tests/test_environments.py @@ -0,0 +1,19 @@ +from databricks.sdk.core import Config +from databricks.sdk.environments import ALL_ENVS, Cloud + + +def test_environment_aws(): + c = Config(host="https://test.cloud.databricks.com", token="token") + assert c.environment.cloud == Cloud.AWS + assert c.environment.dns_zone == ".cloud.databricks.com" + + +def test_environment_azure(): + c = Config(host="https://test.dev.azuredatabricks.net", token="token") + assert c.environment.cloud == Cloud.AZURE + assert c.environment.dns_zone == ".dev.azuredatabricks.net" + + +def test_default_environment_can_be_overridden(): + c = Config(host="https://test.cloud.databricks.com", token="token", databricks_environment=ALL_ENVS[1]) + assert c.environment == ALL_ENVS[1] diff --git a/tests/test_metadata_service_auth.py b/tests/test_metadata_service_auth.py index 753d96f0a..f2c052006 100644 --- a/tests/test_metadata_service_auth.py +++ b/tests/test_metadata_service_auth.py @@ -3,7 +3,8 @@ import requests -from databricks.sdk.core import Config, MetadataServiceTokenSource +from databricks.sdk.core import Config +from databricks.sdk.credentials_provider import MetadataServiceTokenSource def get_test_server(host: str, token: str, expires_after: int): diff --git a/tests/test_oauth.py b/tests/test_oauth.py index 49b194384..ce2d514ff 100644 --- a/tests/test_oauth.py +++ b/tests/test_oauth.py @@ -1,5 +1,5 @@ -from databricks.sdk.core import Config, OidcEndpoints -from databricks.sdk.oauth import OAuthClient, TokenCache +from databricks.sdk.core import Config +from databricks.sdk.oauth import OAuthClient, OidcEndpoints, TokenCache def test_token_cache_unique_filename_by_host(mocker):