diff --git a/docs/account/billing/billable_usage.rst b/docs/account/billing/billable_usage.rst new file mode 100644 index 000000000..51c2eb2fa --- /dev/null +++ b/docs/account/billing/billable_usage.rst @@ -0,0 +1,44 @@ +``a.billable_usage``: Billable usage download +============================================= +.. currentmodule:: databricks.sdk.service.billing + +.. py:class:: BillableUsageAPI + + This API allows you to download billable usage logs for the specified account and date range. This feature + works with all account types. + + .. py:method:: download(start_month: str, end_month: str [, personal_data: Optional[bool]]) -> DownloadResponse + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + resp = a.billable_usage.download(start_month="2023-01", end_month="2023-02") + + Return billable usage logs. + + Returns billable usage logs in CSV format for the specified account and date range. For the data + schema, see [CSV file schema]. Note that this method might take multiple minutes to complete. + + **Warning**: Depending on the queried date range, the number of workspaces in the account, the size of + the response and the internet speed of the caller, this API may hit a timeout after a few minutes. If + you experience this, try to mitigate by calling the API with narrower date ranges. + + [CSV file schema]: https://docs.databricks.com/administration-guide/account-settings/usage-analysis.html#schema + + :param start_month: str + Format: `YYYY-MM`. First month to return billable usage logs for. This field is required. + :param end_month: str + Format: `YYYY-MM`. Last month to return billable usage logs for. This field is required. + :param personal_data: bool (optional) + Specify whether to include personally identifiable information in the billable usage logs, for + example the email addresses of cluster creators. Handle this information with care. Defaults to + false. + + :returns: :class:`DownloadResponse` + \ No newline at end of file diff --git a/docs/account/billing/budgets.rst b/docs/account/billing/budgets.rst new file mode 100644 index 000000000..85f7ee133 --- /dev/null +++ b/docs/account/billing/budgets.rst @@ -0,0 +1,162 @@ +``a.budgets``: Budgets +====================== +.. currentmodule:: databricks.sdk.service.billing + +.. py:class:: BudgetsAPI + + These APIs manage budget configuration including notifications for exceeding a budget for a period. They + can also retrieve the status of each budget. + + .. py:method:: create(budget: Budget) -> WrappedBudgetWithStatus + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import billing + + a = AccountClient() + + created = a.budgets.create(budget=billing.Budget( + name=f'sdk-{time.time_ns()}', + filter="tag.tagName = 'all'", + period="1 month", + start_date="2022-01-01", + target_amount="100", + alerts=[billing.BudgetAlert(email_notifications=["admin@example.com"], min_percentage=50)])) + + # cleanup + a.budgets.delete(budget_id=created.budget.budget_id) + + Create a new budget. + + Creates a new budget in the specified account. + + :param budget: :class:`Budget` + Budget configuration to be created. + + :returns: :class:`WrappedBudgetWithStatus` + + + .. py:method:: delete(budget_id: str) + + Delete budget. + + Deletes the budget specified by its UUID. + + :param budget_id: str + Budget ID + + + + + .. py:method:: get(budget_id: str) -> WrappedBudgetWithStatus + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import billing + + a = AccountClient() + + created = a.budgets.create(budget=billing.Budget( + name=f'sdk-{time.time_ns()}', + filter="tag.tagName = 'all'", + period="1 month", + start_date="2022-01-01", + target_amount="100", + alerts=[billing.BudgetAlert(email_notifications=["admin@example.com"], min_percentage=50)])) + + by_id = a.budgets.get(budget_id=created.budget.budget_id) + + # cleanup + a.budgets.delete(budget_id=created.budget.budget_id) + + Get budget and its status. + + Gets the budget specified by its UUID, including noncumulative status for each day that the budget is + configured to include. + + :param budget_id: str + Budget ID + + :returns: :class:`WrappedBudgetWithStatus` + + + .. py:method:: list() -> Iterator[BudgetWithStatus] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + all = a.budgets.list() + + Get all budgets. + + Gets all budgets associated with this account, including noncumulative status for each day that the + budget is configured to include. + + :returns: Iterator over :class:`BudgetWithStatus` + + + .. py:method:: update(budget_id: str, budget: Budget) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import billing + + a = AccountClient() + + created = a.budgets.create(budget=billing.Budget( + name=f'sdk-{time.time_ns()}', + filter="tag.tagName = 'all'", + period="1 month", + start_date="2022-01-01", + target_amount="100", + alerts=[billing.BudgetAlert(email_notifications=["admin@example.com"], min_percentage=50)])) + + a.budgets.update(budget_id=created.budget.budget_id, + budget=billing.Budget(name=f'sdk-{time.time_ns()}', + filter="tag.tagName = 'all'", + period="1 month", + start_date="2022-01-01", + target_amount="100", + alerts=[ + billing.BudgetAlert(email_notifications=["admin@example.com"], + min_percentage=70) + ])) + + # cleanup + a.budgets.delete(budget_id=created.budget.budget_id) + + Modify budget. + + Modifies a budget in this account. Budget properties are completely overwritten. + + :param budget_id: str + Budget ID + :param budget: :class:`Budget` + Budget configuration to be created. + + + \ No newline at end of file diff --git a/docs/account/billing/index.rst b/docs/account/billing/index.rst new file mode 100644 index 000000000..522f6f5fd --- /dev/null +++ b/docs/account/billing/index.rst @@ -0,0 +1,12 @@ + +Billing +======= + +Configure different aspects of Databricks billing and usage. + +.. toctree:: + :maxdepth: 1 + + billable_usage + budgets + log_delivery \ No newline at end of file diff --git a/docs/account/billing/log_delivery.rst b/docs/account/billing/log_delivery.rst new file mode 100644 index 000000000..04ef4e349 --- /dev/null +++ b/docs/account/billing/log_delivery.rst @@ -0,0 +1,212 @@ +``a.log_delivery``: Log delivery configurations +=============================================== +.. currentmodule:: databricks.sdk.service.billing + +.. py:class:: LogDeliveryAPI + + These APIs manage log delivery configurations for this account. The two supported log types for this API + are _billable usage logs_ and _audit logs_. This feature is in Public Preview. This feature works with all + account ID types. + + Log delivery works with all account types. However, if your account is on the E2 version of the platform + or on a select custom plan that allows multiple workspaces per account, you can optionally configure + different storage destinations for each workspace. Log delivery status is also provided to know the latest + status of log delivery attempts. The high-level flow of billable usage delivery: + + 1. **Create storage**: In AWS, [create a new AWS S3 bucket] with a specific bucket policy. Using + Databricks APIs, call the Account API to create a [storage configuration object](:method:Storage/Create) + that uses the bucket name. 2. **Create credentials**: In AWS, create the appropriate AWS IAM role. For + full details, including the required IAM role policies and trust relationship, see [Billable usage log + delivery]. Using Databricks APIs, call the Account API to create a [credential configuration + object](:method:Credentials/Create) that uses the IAM role"s ARN. 3. **Create log delivery + configuration**: Using Databricks APIs, call the Account API to [create a log delivery + configuration](:method:LogDelivery/Create) that uses the credential and storage configuration objects from + previous steps. You can specify if the logs should include all events of that log type in your account + (_Account level_ delivery) or only events for a specific set of workspaces (_workspace level_ delivery). + Account level log delivery applies to all current and future workspaces plus account level logs, while + workspace level log delivery solely delivers logs related to the specified workspaces. You can create + multiple types of delivery configurations per account. + + For billable usage delivery: * For more information about billable usage logs, see [Billable usage log + delivery]. For the CSV schema, see the [Usage page]. * The delivery location is + `//billable-usage/csv/`, where `` is the name of the optional delivery path + prefix you set up during log delivery configuration. Files are named + `workspaceId=-usageMonth=.csv`. * All billable usage logs apply to specific + workspaces (_workspace level_ logs). You can aggregate usage for your entire account by creating an + _account level_ delivery configuration that delivers logs for all current and future workspaces in your + account. * The files are delivered daily by overwriting the month's CSV file for each workspace. + + For audit log delivery: * For more information about about audit log delivery, see [Audit log delivery], + which includes information about the used JSON schema. * The delivery location is + `//workspaceId=/date=/auditlogs_.json`. + Files may get overwritten with the same content multiple times to achieve exactly-once delivery. * If the + audit log delivery configuration included specific workspace IDs, only _workspace-level_ audit logs for + those workspaces are delivered. If the log delivery configuration applies to the entire account (_account + level_ delivery configuration), the audit log delivery includes workspace-level audit logs for all + workspaces in the account as well as account-level audit logs. See [Audit log delivery] for details. * + Auditable events are typically available in logs within 15 minutes. + + [Audit log delivery]: https://docs.databricks.com/administration-guide/account-settings/audit-logs.html + [Billable usage log delivery]: https://docs.databricks.com/administration-guide/account-settings/billable-usage-delivery.html + [Usage page]: https://docs.databricks.com/administration-guide/account-settings/usage.html + [create a new AWS S3 bucket]: https://docs.databricks.com/administration-guide/account-api/aws-storage.html + + .. py:method:: create( [, log_delivery_configuration: Optional[CreateLogDeliveryConfigurationParams]]) -> WrappedLogDeliveryConfiguration + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import billing, provisioning + + a = AccountClient() + + bucket = a.storage.create(storage_configuration_name=f'sdk-{time.time_ns()}', + root_bucket_info=provisioning.RootBucketInfo(bucket_name=f'sdk-{time.time_ns()}')) + + creds = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_LOGDELIVERY_ARN"]))) + + created = a.log_delivery.create(log_delivery_configuration=billing.CreateLogDeliveryConfigurationParams( + config_name=f'sdk-{time.time_ns()}', + credentials_id=creds.credentials_id, + storage_configuration_id=bucket.storage_configuration_id, + log_type=billing.LogType.AUDIT_LOGS, + output_format=billing.OutputFormat.JSON)) + + # cleanup + a.storage.delete(storage_configuration_id=bucket.storage_configuration_id) + a.credentials.delete(credentials_id=creds.credentials_id) + a.log_delivery.patch_status(log_delivery_configuration_id=created.log_delivery_configuration.config_id, + status=billing.LogDeliveryConfigStatus.DISABLED) + + Create a new log delivery configuration. + + Creates a new Databricks log delivery configuration to enable delivery of the specified type of logs + to your storage location. This requires that you already created a [credential + object](:method:Credentials/Create) (which encapsulates a cross-account service IAM role) and a + [storage configuration object](:method:Storage/Create) (which encapsulates an S3 bucket). + + For full details, including the required IAM role policies and bucket policies, see [Deliver and + access billable usage logs] or [Configure audit logging]. + + **Note**: There is a limit on the number of log delivery configurations available per account (each + limit applies separately to each log type including billable usage and audit logs). You can create a + maximum of two enabled account-level delivery configurations (configurations without a workspace + filter) per type. Additionally, you can create two enabled workspace-level delivery configurations per + workspace for each log type, which means that the same workspace ID can occur in the workspace filter + for no more than two delivery configurations per log type. + + You cannot delete a log delivery configuration, but you can disable it (see [Enable or disable log + delivery configuration](:method:LogDelivery/PatchStatus)). + + [Configure audit logging]: https://docs.databricks.com/administration-guide/account-settings/audit-logs.html + [Deliver and access billable usage logs]: https://docs.databricks.com/administration-guide/account-settings/billable-usage-delivery.html + + :param log_delivery_configuration: :class:`CreateLogDeliveryConfigurationParams` (optional) + + :returns: :class:`WrappedLogDeliveryConfiguration` + + + .. py:method:: get(log_delivery_configuration_id: str) -> WrappedLogDeliveryConfiguration + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import billing, provisioning + + a = AccountClient() + + bucket = a.storage.create(storage_configuration_name=f'sdk-{time.time_ns()}', + root_bucket_info=provisioning.RootBucketInfo(bucket_name=f'sdk-{time.time_ns()}')) + + creds = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_LOGDELIVERY_ARN"]))) + + created = a.log_delivery.create(log_delivery_configuration=billing.CreateLogDeliveryConfigurationParams( + config_name=f'sdk-{time.time_ns()}', + credentials_id=creds.credentials_id, + storage_configuration_id=bucket.storage_configuration_id, + log_type=billing.LogType.AUDIT_LOGS, + output_format=billing.OutputFormat.JSON)) + + by_id = a.log_delivery.get(log_delivery_configuration_id=created.log_delivery_configuration.config_id) + + # cleanup + a.storage.delete(storage_configuration_id=bucket.storage_configuration_id) + a.credentials.delete(credentials_id=creds.credentials_id) + a.log_delivery.patch_status(log_delivery_configuration_id=created.log_delivery_configuration.config_id, + status=billing.LogDeliveryConfigStatus.DISABLED) + + Get log delivery configuration. + + Gets a Databricks log delivery configuration object for an account, both specified by ID. + + :param log_delivery_configuration_id: str + Databricks log delivery configuration ID + + :returns: :class:`WrappedLogDeliveryConfiguration` + + + .. py:method:: list( [, credentials_id: Optional[str], status: Optional[LogDeliveryConfigStatus], storage_configuration_id: Optional[str]]) -> Iterator[LogDeliveryConfiguration] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + from databricks.sdk.service import billing + + a = AccountClient() + + all = a.log_delivery.list(billing.ListLogDeliveryRequest()) + + Get all log delivery configurations. + + Gets all Databricks log delivery configurations associated with an account specified by ID. + + :param credentials_id: str (optional) + Filter by credential configuration ID. + :param status: :class:`LogDeliveryConfigStatus` (optional) + Filter by status `ENABLED` or `DISABLED`. + :param storage_configuration_id: str (optional) + Filter by storage configuration ID. + + :returns: Iterator over :class:`LogDeliveryConfiguration` + + + .. py:method:: patch_status(log_delivery_configuration_id: str, status: LogDeliveryConfigStatus) + + Enable or disable log delivery configuration. + + Enables or disables a log delivery configuration. Deletion of delivery configurations is not + supported, so disable log delivery configurations that are no longer needed. Note that you can't + re-enable a delivery configuration if this would violate the delivery configuration limits described + under [Create log delivery](:method:LogDelivery/Create). + + :param log_delivery_configuration_id: str + Databricks log delivery configuration ID + :param status: :class:`LogDeliveryConfigStatus` + Status of log delivery configuration. Set to `ENABLED` (enabled) or `DISABLED` (disabled). Defaults + to `ENABLED`. You can [enable or disable the + configuration](#operation/patch-log-delivery-config-status) later. Deletion of a configuration is + not supported, so disable a log delivery configuration that is no longer needed. + + + \ No newline at end of file diff --git a/docs/account/catalog/index.rst b/docs/account/catalog/index.rst new file mode 100644 index 000000000..c774d529d --- /dev/null +++ b/docs/account/catalog/index.rst @@ -0,0 +1,12 @@ + +Unity Catalog +============= + +Configure data governance with Unity Catalog for metastores, catalogs, schemas, tables, external locations, and storage credentials + +.. toctree:: + :maxdepth: 1 + + metastore_assignments + metastores + storage_credentials \ No newline at end of file diff --git a/docs/account/catalog/metastore_assignments.rst b/docs/account/catalog/metastore_assignments.rst new file mode 100644 index 000000000..f5b00c6b3 --- /dev/null +++ b/docs/account/catalog/metastore_assignments.rst @@ -0,0 +1,91 @@ +``a.metastore_assignments``: Account Metastore Assignments +========================================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: AccountMetastoreAssignmentsAPI + + These APIs manage metastore assignments to a workspace. + + .. py:method:: create(workspace_id: int, metastore_id: str [, metastore_assignment: Optional[CreateMetastoreAssignment]]) + + Assigns a workspace to a metastore. + + Creates an assignment to a metastore for a workspace + + :param workspace_id: int + Workspace ID. + :param metastore_id: str + Unity Catalog metastore ID + :param metastore_assignment: :class:`CreateMetastoreAssignment` (optional) + + + + + .. py:method:: delete(workspace_id: int, metastore_id: str) + + Delete a metastore assignment. + + Deletes a metastore assignment to a workspace, leaving the workspace with no metastore. + + :param workspace_id: int + Workspace ID. + :param metastore_id: str + Unity Catalog metastore ID + + + + + .. py:method:: get(workspace_id: int) -> AccountsMetastoreAssignment + + Gets the metastore assignment for a workspace. + + Gets the metastore assignment, if any, for the workspace specified by ID. If the workspace is assigned + a metastore, the mappig will be returned. If no metastore is assigned to the workspace, the assignment + will not be found and a 404 returned. + + :param workspace_id: int + Workspace ID. + + :returns: :class:`AccountsMetastoreAssignment` + + + .. py:method:: list(metastore_id: str) -> Iterator[int] + + + Usage: + + .. code-block:: + + import os + + from databricks.sdk import AccountClient + + a = AccountClient() + + ws = a.metastore_assignments.list(metastore_id=os.environ["TEST_METASTORE_ID"]) + + Get all workspaces assigned to a metastore. + + Gets a list of all Databricks workspace IDs that have been assigned to given metastore. + + :param metastore_id: str + Unity Catalog metastore ID + + :returns: Iterator over int + + + .. py:method:: update(workspace_id: int, metastore_id: str [, metastore_assignment: Optional[UpdateMetastoreAssignment]]) + + Updates a metastore assignment to a workspaces. + + Updates an assignment to a metastore for a workspace. Currently, only the default catalog may be + updated. + + :param workspace_id: int + Workspace ID. + :param metastore_id: str + Unity Catalog metastore ID + :param metastore_assignment: :class:`UpdateMetastoreAssignment` (optional) + + + \ No newline at end of file diff --git a/docs/account/catalog/metastores.rst b/docs/account/catalog/metastores.rst new file mode 100644 index 000000000..d0c2752ed --- /dev/null +++ b/docs/account/catalog/metastores.rst @@ -0,0 +1,139 @@ +``a.metastores``: Account Metastores +==================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: AccountMetastoresAPI + + These APIs manage Unity Catalog metastores for an account. A metastore contains catalogs that can be + associated with workspaces + + .. py:method:: create( [, metastore_info: Optional[CreateMetastore]]) -> AccountsMetastoreInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Create metastore. + + Creates a Unity Catalog metastore. + + :param metastore_info: :class:`CreateMetastore` (optional) + + :returns: :class:`AccountsMetastoreInfo` + + + .. py:method:: delete(metastore_id: str [, force: Optional[bool]]) + + Delete a metastore. + + Deletes a Unity Catalog metastore for an account, both specified by ID. + + :param metastore_id: str + Unity Catalog metastore ID + :param force: bool (optional) + Force deletion even if the metastore is not empty. Default is false. + + + + + .. py:method:: get(metastore_id: str) -> AccountsMetastoreInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + _ = w.metastores.get(id=created.metastore_id) + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Get a metastore. + + Gets a Unity Catalog metastore from an account, both specified by ID. + + :param metastore_id: str + Unity Catalog metastore ID + + :returns: :class:`AccountsMetastoreInfo` + + + .. py:method:: list() -> Iterator[MetastoreInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.metastores.list() + + Get all metastores associated with an account. + + Gets all Unity Catalog metastores associated with an account specified by ID. + + :returns: Iterator over :class:`MetastoreInfo` + + + .. py:method:: update(metastore_id: str [, metastore_info: Optional[UpdateMetastore]]) -> AccountsMetastoreInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + _ = w.metastores.update(id=created.metastore_id, name=f'sdk-{time.time_ns()}') + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Update a metastore. + + Updates an existing Unity Catalog metastore. + + :param metastore_id: str + Unity Catalog metastore ID + :param metastore_info: :class:`UpdateMetastore` (optional) + + :returns: :class:`AccountsMetastoreInfo` + \ No newline at end of file diff --git a/docs/account/catalog/storage_credentials.rst b/docs/account/catalog/storage_credentials.rst new file mode 100644 index 000000000..9972ffbf8 --- /dev/null +++ b/docs/account/catalog/storage_credentials.rst @@ -0,0 +1,164 @@ +``a.storage_credentials``: Account Storage Credentials +====================================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: AccountStorageCredentialsAPI + + These APIs manage storage credentials for a particular metastore. + + .. py:method:: create(metastore_id: str [, credential_info: Optional[CreateStorageCredential]]) -> AccountsStorageCredentialInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + created = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + # cleanup + w.storage_credentials.delete(delete=created.name) + + Create a storage credential. + + Creates a new storage credential. The request object is specific to the cloud: + + * **AwsIamRole** for AWS credentials * **AzureServicePrincipal** for Azure credentials * + **GcpServiceAcountKey** for GCP credentials. + + The caller must be a metastore admin and have the **CREATE_STORAGE_CREDENTIAL** privilege on the + metastore. + + :param metastore_id: str + Unity Catalog metastore ID + :param credential_info: :class:`CreateStorageCredential` (optional) + + :returns: :class:`AccountsStorageCredentialInfo` + + + .. py:method:: delete(metastore_id: str, storage_credential_name: str [, force: Optional[bool]]) + + Delete a storage credential. + + Deletes a storage credential from the metastore. The caller must be an owner of the storage + credential. + + :param metastore_id: str + Unity Catalog metastore ID + :param storage_credential_name: str + Name of the storage credential. + :param force: bool (optional) + Force deletion even if the Storage Credential is not empty. Default is false. + + + + + .. py:method:: get(metastore_id: str, storage_credential_name: str) -> AccountsStorageCredentialInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + created = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + by_name = w.storage_credentials.get(name=created.name) + + # cleanup + w.storage_credentials.delete(name=created.name) + + Gets the named storage credential. + + Gets a storage credential from the metastore. The caller must be a metastore admin, the owner of the + storage credential, or have a level of privilege on the storage credential. + + :param metastore_id: str + Unity Catalog metastore ID + :param storage_credential_name: str + Name of the storage credential. + + :returns: :class:`AccountsStorageCredentialInfo` + + + .. py:method:: list(metastore_id: str) -> Iterator[StorageCredentialInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.storage_credentials.list() + + Get all storage credentials assigned to a metastore. + + Gets a list of all storage credentials that have been assigned to given metastore. + + :param metastore_id: str + Unity Catalog metastore ID + + :returns: Iterator over :class:`StorageCredentialInfo` + + + .. py:method:: update(metastore_id: str, storage_credential_name: str [, credential_info: Optional[UpdateStorageCredential]]) -> AccountsStorageCredentialInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + created = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + _ = w.storage_credentials.update( + name=created.name, + comment=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + # cleanup + w.storage_credentials.delete(delete=created.name) + + Updates a storage credential. + + Updates a storage credential on the metastore. The caller must be the owner of the storage credential. + If the caller is a metastore admin, only the __owner__ credential can be changed. + + :param metastore_id: str + Unity Catalog metastore ID + :param storage_credential_name: str + Name of the storage credential. + :param credential_info: :class:`UpdateStorageCredential` (optional) + + :returns: :class:`AccountsStorageCredentialInfo` + \ No newline at end of file diff --git a/docs/account/iam/access_control.rst b/docs/account/iam/access_control.rst new file mode 100644 index 000000000..2537e262c --- /dev/null +++ b/docs/account/iam/access_control.rst @@ -0,0 +1,56 @@ +``a.access_control``: Account Access Control +============================================ +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: AccountAccessControlAPI + + These APIs manage access rules on resources in an account. Currently, only grant rules are supported. A + grant rule specifies a role assigned to a set of principals. A list of rules attached to a resource is + called a rule set. + + .. py:method:: get_assignable_roles_for_resource(resource: str) -> GetAssignableRolesForResourceResponse + + Get assignable roles for a resource. + + Gets all the roles that can be granted on an account level resource. A role is grantable if the rule + set on the resource can contain an access rule of the role. + + :param resource: str + The resource name for which assignable roles will be listed. + + :returns: :class:`GetAssignableRolesForResourceResponse` + + + .. py:method:: get_rule_set(name: str, etag: str) -> RuleSetResponse + + Get a rule set. + + Get a rule set by its name. A rule set is always attached to a resource and contains a list of access + rules on the said resource. Currently only a default rule set for each resource is supported. + + :param name: str + The ruleset name associated with the request. + :param etag: str + Etag used for versioning. The response is at least as fresh as the eTag provided. Etag is used for + optimistic concurrency control as a way to help prevent simultaneous updates of a rule set from + overwriting each other. It is strongly suggested that systems make use of the etag in the read -> + modify -> write pattern to perform rule set updates in order to avoid race conditions that is get an + etag from a GET rule set request, and pass it with the PUT update request to identify the rule set + version you are updating. + + :returns: :class:`RuleSetResponse` + + + .. py:method:: update_rule_set(name: str, rule_set: RuleSetUpdateRequest) -> RuleSetResponse + + Update a rule set. + + Replace the rules of a rule set. First, use get to read the current version of the rule set before + modifying it. This pattern helps prevent conflicts between concurrent updates. + + :param name: str + Name of the rule set. + :param rule_set: :class:`RuleSetUpdateRequest` + + :returns: :class:`RuleSetResponse` + \ No newline at end of file diff --git a/docs/account/iam/groups.rst b/docs/account/iam/groups.rst new file mode 100644 index 000000000..be1af3c86 --- /dev/null +++ b/docs/account/iam/groups.rst @@ -0,0 +1,185 @@ +``a.groups``: Account Groups +============================ +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: AccountGroupsAPI + + Groups simplify identity management, making it easier to assign access to Databricks account, data, and + other securable objects. + + It is best practice to assign access to workspaces and access-control policies in Unity Catalog to groups, + instead of to users individually. All Databricks account identities can be assigned as members of groups, + and members inherit permissions that are assigned to their group. + + .. py:method:: create( [, display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], id: Optional[str], members: Optional[List[ComplexValue]], meta: Optional[ResourceMeta], roles: Optional[List[ComplexValue]], schemas: Optional[List[GroupSchema]]]) -> Group + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + # cleanup + w.groups.delete(id=group.id) + + Create a new group. + + Creates a group in the Databricks account with a unique name, using the supplied group details. + + :param display_name: str (optional) + String that represents a human-readable group name + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the group. See [assigning entitlements] for a full list of supported + values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param id: str (optional) + Databricks group ID + :param members: List[:class:`ComplexValue`] (optional) + :param meta: :class:`ResourceMeta` (optional) + Container for the group identifier. Workspace local versus account. + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`GroupSchema`] (optional) + The schema of the group. + + :returns: :class:`Group` + + + .. py:method:: delete(id: str) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + w.groups.delete(id=group.id) + + Delete a group. + + Deletes a group from the Databricks account. + + :param id: str + Unique ID for a group in the Databricks account. + + + + + .. py:method:: get(id: str) -> Group + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + fetch = w.groups.get(id=group.id) + + # cleanup + w.groups.delete(id=group.id) + + Get group details. + + Gets the information for a specific group in the Databricks account. + + :param id: str + Unique ID for a group in the Databricks account. + + :returns: :class:`Group` + + + .. py:method:: list( [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[ListSortOrder], start_index: Optional[int]]) -> Iterator[Group] + + List group details. + + Gets all details of the groups associated with the Databricks account. + + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. Default is 10000. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. + :param sort_order: :class:`ListSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: Iterator over :class:`Group` + + + .. py:method:: patch(id: str [, operations: Optional[List[Patch]], schemas: Optional[List[PatchSchema]]]) + + Update group details. + + Partially updates the details of a group. + + :param id: str + Unique ID for a group in the Databricks account. + :param operations: List[:class:`Patch`] (optional) + :param schemas: List[:class:`PatchSchema`] (optional) + The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. + + + + + .. py:method:: update(id: str [, display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], members: Optional[List[ComplexValue]], meta: Optional[ResourceMeta], roles: Optional[List[ComplexValue]], schemas: Optional[List[GroupSchema]]]) + + Replace a group. + + Updates the details of a group by replacing the entire group entity. + + :param id: str + Databricks group ID + :param display_name: str (optional) + String that represents a human-readable group name + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the group. See [assigning entitlements] for a full list of supported + values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param members: List[:class:`ComplexValue`] (optional) + :param meta: :class:`ResourceMeta` (optional) + Container for the group identifier. Workspace local versus account. + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`GroupSchema`] (optional) + The schema of the group. + + + \ No newline at end of file diff --git a/docs/account/iam/index.rst b/docs/account/iam/index.rst new file mode 100644 index 000000000..1939a1a1e --- /dev/null +++ b/docs/account/iam/index.rst @@ -0,0 +1,14 @@ + +Identity and Access Management +============================== + +Manage users, service principals, groups and their permissions in Accounts and Workspaces + +.. toctree:: + :maxdepth: 1 + + access_control + groups + service_principals + users + workspace_assignment \ No newline at end of file diff --git a/docs/account/iam/service_principals.rst b/docs/account/iam/service_principals.rst new file mode 100644 index 000000000..baef75be9 --- /dev/null +++ b/docs/account/iam/service_principals.rst @@ -0,0 +1,241 @@ +``a.service_principals``: Account Service Principals +==================================================== +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: AccountServicePrincipalsAPI + + Identities for use with jobs, automated tools, and systems such as scripts, apps, and CI/CD platforms. + Databricks recommends creating service principals to run production jobs or modify production data. If all + processes that act on production data run with service principals, interactive users do not need any + write, delete, or modify privileges in production. This eliminates the risk of a user overwriting + production data by accident. + + .. py:method:: create( [, active: Optional[bool], application_id: Optional[str], display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], id: Optional[str], roles: Optional[List[ComplexValue]], schemas: Optional[List[ServicePrincipalSchema]]]) -> ServicePrincipal + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + groups = w.groups.group_display_name_to_id_map(iam.ListGroupsRequest()) + + spn = w.service_principals.create(display_name=f'sdk-{time.time_ns()}', + groups=[iam.ComplexValue(value=groups["admins"])]) + + # cleanup + w.service_principals.delete(id=spn.id) + + Create a service principal. + + Creates a new service principal in the Databricks account. + + :param active: bool (optional) + If this user is active + :param application_id: str (optional) + UUID relating to the service principal + :param display_name: str (optional) + String that represents a concatenation of given and family names. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the service principal. See [assigning entitlements] for a full list of + supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param id: str (optional) + Databricks service principal ID. + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`ServicePrincipalSchema`] (optional) + The schema of the List response. + + :returns: :class:`ServicePrincipal` + + + .. py:method:: delete(id: str) + + Delete a service principal. + + Delete a single service principal in the Databricks account. + + :param id: str + Unique ID for a service principal in the Databricks account. + + + + + .. py:method:: get(id: str) -> ServicePrincipal + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.service_principals.create(display_name=f'sdk-{time.time_ns()}') + + by_id = w.service_principals.get(id=created.id) + + # cleanup + w.service_principals.delete(id=created.id) + + Get service principal details. + + Gets the details for a single service principal define in the Databricks account. + + :param id: str + Unique ID for a service principal in the Databricks account. + + :returns: :class:`ServicePrincipal` + + + .. py:method:: list( [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[ListSortOrder], start_index: Optional[int]]) -> Iterator[ServicePrincipal] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + sp_create = a.service_principals.create(active=True, display_name=f'sdk-{time.time_ns()}') + + sp = a.service_principals.get(id=sp_create.id) + + sp_list = a.service_principals.list(filter="displayName eq %v" % (sp.display_name)) + + # cleanup + a.service_principals.delete(id=sp_create.id) + + List service principals. + + Gets the set of service principals associated with a Databricks account. + + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. Default is 10000. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. + :param sort_order: :class:`ListSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: Iterator over :class:`ServicePrincipal` + + + .. py:method:: patch(id: str [, operations: Optional[List[Patch]], schemas: Optional[List[PatchSchema]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import iam + + a = AccountClient() + + sp_create = a.service_principals.create(active=True, display_name=f'sdk-{time.time_ns()}') + + sp = a.service_principals.get(id=sp_create.id) + + a.service_principals.patch(id=sp.id, + operations=[iam.Patch(op=iam.PatchOp.REPLACE, path="active", value="false")], + schemas=[iam.PatchSchema.URN_IETF_PARAMS_SCIM_API_MESSAGES_2_0_PATCH_OP]) + + # cleanup + a.service_principals.delete(id=sp_create.id) + + Update service principal details. + + Partially updates the details of a single service principal in the Databricks account. + + :param id: str + Unique ID for a service principal in the Databricks account. + :param operations: List[:class:`Patch`] (optional) + :param schemas: List[:class:`PatchSchema`] (optional) + The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. + + + + + .. py:method:: update(id: str [, active: Optional[bool], application_id: Optional[str], display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], roles: Optional[List[ComplexValue]], schemas: Optional[List[ServicePrincipalSchema]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + created = w.service_principals.create(display_name=f'sdk-{time.time_ns()}') + + w.service_principals.update(id=created.id, + display_name=f'sdk-{time.time_ns()}', + roles=[iam.ComplexValue(value="xyz")]) + + # cleanup + w.service_principals.delete(id=created.id) + + Replace service principal. + + Updates the details of a single service principal. + + This action replaces the existing service principal with the same name. + + :param id: str + Databricks service principal ID. + :param active: bool (optional) + If this user is active + :param application_id: str (optional) + UUID relating to the service principal + :param display_name: str (optional) + String that represents a concatenation of given and family names. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the service principal. See [assigning entitlements] for a full list of + supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`ServicePrincipalSchema`] (optional) + The schema of the List response. + + + \ No newline at end of file diff --git a/docs/account/iam/users.rst b/docs/account/iam/users.rst new file mode 100644 index 000000000..54a113542 --- /dev/null +++ b/docs/account/iam/users.rst @@ -0,0 +1,276 @@ +``a.users``: Account Users +========================== +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: AccountUsersAPI + + User identities recognized by Databricks and represented by email addresses. + + Databricks recommends using SCIM provisioning to sync users and groups automatically from your identity + provider to your Databricks account. SCIM streamlines onboarding a new employee or team by using your + identity provider to create users and groups in Databricks account and give them the proper level of + access. When a user leaves your organization or no longer needs access to Databricks account, admins can + terminate the user in your identity provider and that user’s account will also be removed from + Databricks account. This ensures a consistent offboarding process and prevents unauthorized users from + accessing sensitive data. + + .. py:method:: create( [, active: Optional[bool], display_name: Optional[str], emails: Optional[List[ComplexValue]], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], id: Optional[str], name: Optional[Name], roles: Optional[List[ComplexValue]], schemas: Optional[List[UserSchema]], user_name: Optional[str]]) -> User + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + user = a.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + # cleanup + a.users.delete(id=user.id) + + Create a new user. + + Creates a new user in the Databricks account. This new user will also be added to the Databricks + account. + + :param active: bool (optional) + If this user is active + :param display_name: str (optional) + String that represents a concatenation of given and family names. For example `John Smith`. This + field cannot be updated through the Workspace SCIM APIs when [identity federation is enabled]. Use + Account SCIM APIs to update `displayName`. + + [identity federation is enabled]: https://docs.databricks.com/administration-guide/users-groups/best-practices.html#enable-identity-federation + :param emails: List[:class:`ComplexValue`] (optional) + All the emails associated with the Databricks user. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the user. See [assigning entitlements] for a full list of supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + External ID is not currently supported. It is reserved for future use. + :param groups: List[:class:`ComplexValue`] (optional) + :param id: str (optional) + Databricks user ID. This is automatically set by Databricks. Any value provided by the client will + be ignored. + :param name: :class:`Name` (optional) + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`UserSchema`] (optional) + The schema of the user. + :param user_name: str (optional) + Email address of the Databricks user. + + :returns: :class:`User` + + + .. py:method:: delete(id: str) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + other_owner = w.users.create(user_name=f'sdk-{time.time_ns()}@example.com') + + w.users.delete(id=other_owner.id) + + Delete a user. + + Deletes a user. Deleting a user from a Databricks account also removes objects associated with the + user. + + :param id: str + Unique ID for a user in the Databricks account. + + + + + .. py:method:: get(id: str [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[GetSortOrder], start_index: Optional[int]]) -> User + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + user = a.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + by_id = a.users.get(id=user.id) + + # cleanup + a.users.delete(id=user.id) + + Get user details. + + Gets information for a specific user in Databricks account. + + :param id: str + Unique ID for a user in the Databricks account. + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. Default is 10000. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. Multi-part paths are supported. For example, `userName`, + `name.givenName`, and `emails`. + :param sort_order: :class:`GetSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: :class:`User` + + + .. py:method:: list( [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[ListSortOrder], start_index: Optional[int]]) -> Iterator[User] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + all_users = w.users.list(attributes="id,userName", + sort_by="userName", + sort_order=iam.ListSortOrder.DESCENDING) + + List users. + + Gets details for all the users associated with a Databricks account. + + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. Default is 10000. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. Multi-part paths are supported. For example, `userName`, + `name.givenName`, and `emails`. + :param sort_order: :class:`ListSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: Iterator over :class:`User` + + + .. py:method:: patch(id: str [, operations: Optional[List[Patch]], schemas: Optional[List[PatchSchema]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + user = w.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + w.users.patch(id=user.id, + operations=[iam.Patch(op=iam.PatchOp.REPLACE, path="active", value="false")], + schemas=[iam.PatchSchema.URN_IETF_PARAMS_SCIM_API_MESSAGES_2_0_PATCH_OP]) + + Update user details. + + Partially updates a user resource by applying the supplied operations on specific user attributes. + + :param id: str + Unique ID for a user in the Databricks account. + :param operations: List[:class:`Patch`] (optional) + :param schemas: List[:class:`PatchSchema`] (optional) + The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. + + + + + .. py:method:: update(id: str [, active: Optional[bool], display_name: Optional[str], emails: Optional[List[ComplexValue]], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], name: Optional[Name], roles: Optional[List[ComplexValue]], schemas: Optional[List[UserSchema]], user_name: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + user = w.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + w.users.update(id=user.id, user_name=user.user_name, active=True) + + Replace a user. + + Replaces a user's information with the data supplied in request. + + :param id: str + Databricks user ID. This is automatically set by Databricks. Any value provided by the client will + be ignored. + :param active: bool (optional) + If this user is active + :param display_name: str (optional) + String that represents a concatenation of given and family names. For example `John Smith`. This + field cannot be updated through the Workspace SCIM APIs when [identity federation is enabled]. Use + Account SCIM APIs to update `displayName`. + + [identity federation is enabled]: https://docs.databricks.com/administration-guide/users-groups/best-practices.html#enable-identity-federation + :param emails: List[:class:`ComplexValue`] (optional) + All the emails associated with the Databricks user. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the user. See [assigning entitlements] for a full list of supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + External ID is not currently supported. It is reserved for future use. + :param groups: List[:class:`ComplexValue`] (optional) + :param name: :class:`Name` (optional) + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`UserSchema`] (optional) + The schema of the user. + :param user_name: str (optional) + Email address of the Databricks user. + + + \ No newline at end of file diff --git a/docs/account/iam/workspace_assignment.rst b/docs/account/iam/workspace_assignment.rst new file mode 100644 index 000000000..a09af197c --- /dev/null +++ b/docs/account/iam/workspace_assignment.rst @@ -0,0 +1,102 @@ +``a.workspace_assignment``: Workspace Assignment +================================================ +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: WorkspaceAssignmentAPI + + The Workspace Permission Assignment API allows you to manage workspace permissions for principals in your + account. + + .. py:method:: delete(workspace_id: int, principal_id: int) + + Delete permissions assignment. + + Deletes the workspace permissions assignment in a given account and workspace for the specified + principal. + + :param workspace_id: int + The workspace ID. + :param principal_id: int + The ID of the user, service principal, or group. + + + + + .. py:method:: get(workspace_id: int) -> WorkspacePermissions + + List workspace permissions. + + Get an array of workspace permissions for the specified account and workspace. + + :param workspace_id: int + The workspace ID. + + :returns: :class:`WorkspacePermissions` + + + .. py:method:: list(workspace_id: int) -> Iterator[PermissionAssignment] + + + Usage: + + .. code-block:: + + import os + + from databricks.sdk import AccountClient + + a = AccountClient() + + workspace_id = os.environ["TEST_WORKSPACE_ID"] + + all = a.workspace_assignment.list(list=workspace_id) + + Get permission assignments. + + Get the permission assignments for the specified Databricks account and Databricks workspace. + + :param workspace_id: int + The workspace ID for the account. + + :returns: Iterator over :class:`PermissionAssignment` + + + .. py:method:: update(workspace_id: int, principal_id: int, permissions: List[WorkspacePermission]) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import iam + + a = AccountClient() + + spn = a.service_principals.create(display_name=f'sdk-{time.time_ns()}') + + spn_id = spn.id + + workspace_id = os.environ["DUMMY_WORKSPACE_ID"] + + a.workspace_assignment.update(workspace_id=workspace_id, + principal_id=spn_id, + permissions=[iam.WorkspacePermission.USER]) + + Create or update permissions assignment. + + Creates or updates the workspace permissions assignment in a given account and workspace for the + specified principal. + + :param workspace_id: int + The workspace ID. + :param principal_id: int + The ID of the user, service principal, or group. + :param permissions: List[:class:`WorkspacePermission`] + Array of permissions assignments to update on the workspace. + + + \ No newline at end of file diff --git a/docs/account/index.rst b/docs/account/index.rst new file mode 100644 index 000000000..8179cb395 --- /dev/null +++ b/docs/account/index.rst @@ -0,0 +1,15 @@ + +Account APIs +============ + +These APIs are available from AccountClient + +.. toctree:: + :maxdepth: 1 + + iam/index + catalog/index + settings/index + provisioning/index + billing/index + oauth2/index \ No newline at end of file diff --git a/docs/account/oauth2/custom_app_integration.rst b/docs/account/oauth2/custom_app_integration.rst new file mode 100644 index 000000000..382ce0bd0 --- /dev/null +++ b/docs/account/oauth2/custom_app_integration.rst @@ -0,0 +1,82 @@ +``a.custom_app_integration``: OAuth Custom App Integration +========================================================== +.. currentmodule:: databricks.sdk.service.oauth2 + +.. py:class:: CustomAppIntegrationAPI + + These APIs enable administrators to manage custom oauth app integrations, which is required for + adding/using Custom OAuth App Integration like Tableau Cloud for Databricks in AWS cloud. + + .. py:method:: create(name: str, redirect_urls: List[str] [, confidential: Optional[bool], scopes: Optional[List[str]], token_access_policy: Optional[TokenAccessPolicy]]) -> CreateCustomAppIntegrationOutput + + Create Custom OAuth App Integration. + + Create Custom OAuth App Integration. + + You can retrieve the custom oauth app integration via :method:CustomAppIntegration/get. + + :param name: str + name of the custom oauth app + :param redirect_urls: List[str] + List of oauth redirect urls + :param confidential: bool (optional) + indicates if an oauth client-secret should be generated + :param scopes: List[str] (optional) + OAuth scopes granted to the application. Supported scopes: all-apis, sql, offline_access, openid, + profile, email. + :param token_access_policy: :class:`TokenAccessPolicy` (optional) + Token access policy + + :returns: :class:`CreateCustomAppIntegrationOutput` + + + .. py:method:: delete(integration_id: str) + + Delete Custom OAuth App Integration. + + Delete an existing Custom OAuth App Integration. You can retrieve the custom oauth app integration via + :method:CustomAppIntegration/get. + + :param integration_id: str + The oauth app integration ID. + + + + + .. py:method:: get(integration_id: str) -> GetCustomAppIntegrationOutput + + Get OAuth Custom App Integration. + + Gets the Custom OAuth App Integration for the given integration id. + + :param integration_id: str + The oauth app integration ID. + + :returns: :class:`GetCustomAppIntegrationOutput` + + + .. py:method:: list() -> Iterator[GetCustomAppIntegrationOutput] + + Get custom oauth app integrations. + + Get the list of custom oauth app integrations for the specified Databricks account + + :returns: Iterator over :class:`GetCustomAppIntegrationOutput` + + + .. py:method:: update(integration_id: str [, redirect_urls: Optional[List[str]], token_access_policy: Optional[TokenAccessPolicy]]) + + Updates Custom OAuth App Integration. + + Updates an existing custom OAuth App Integration. You can retrieve the custom oauth app integration + via :method:CustomAppIntegration/get. + + :param integration_id: str + The oauth app integration ID. + :param redirect_urls: List[str] (optional) + List of oauth redirect urls to be updated in the custom oauth app integration + :param token_access_policy: :class:`TokenAccessPolicy` (optional) + Token access policy to be updated in the custom oauth app integration + + + \ No newline at end of file diff --git a/docs/account/oauth2/index.rst b/docs/account/oauth2/index.rst new file mode 100644 index 000000000..a4663ef6b --- /dev/null +++ b/docs/account/oauth2/index.rst @@ -0,0 +1,13 @@ + +OAuth +===== + +Configure OAuth 2.0 application registrations for Databricks + +.. toctree:: + :maxdepth: 1 + + custom_app_integration + o_auth_published_apps + published_app_integration + service_principal_secrets \ No newline at end of file diff --git a/docs/account/oauth2/o_auth_published_apps.rst b/docs/account/oauth2/o_auth_published_apps.rst new file mode 100644 index 000000000..69aecb8ad --- /dev/null +++ b/docs/account/oauth2/o_auth_published_apps.rst @@ -0,0 +1,23 @@ +``a.o_auth_published_apps``: OAuth Published App +================================================ +.. currentmodule:: databricks.sdk.service.oauth2 + +.. py:class:: OAuthPublishedAppsAPI + + These APIs enable administrators to view all the available published OAuth applications in Databricks. + Administrators can add the published OAuth applications to their account through the OAuth Published App + Integration APIs. + + .. py:method:: list( [, page_size: Optional[int], page_token: Optional[str]]) -> Iterator[PublishedAppOutput] + + Get all the published OAuth apps. + + Get all the available published OAuth apps in Databricks. + + :param page_size: int (optional) + The max number of OAuth published apps to return. + :param page_token: str (optional) + A token that can be used to get the next page of results. + + :returns: Iterator over :class:`PublishedAppOutput` + \ No newline at end of file diff --git a/docs/account/oauth2/published_app_integration.rst b/docs/account/oauth2/published_app_integration.rst new file mode 100644 index 000000000..0488415cd --- /dev/null +++ b/docs/account/oauth2/published_app_integration.rst @@ -0,0 +1,73 @@ +``a.published_app_integration``: OAuth Published App Integration +================================================================ +.. currentmodule:: databricks.sdk.service.oauth2 + +.. py:class:: PublishedAppIntegrationAPI + + These APIs enable administrators to manage published oauth app integrations, which is required for + adding/using Published OAuth App Integration like Tableau Desktop for Databricks in AWS cloud. + + .. py:method:: create( [, app_id: Optional[str], token_access_policy: Optional[TokenAccessPolicy]]) -> CreatePublishedAppIntegrationOutput + + Create Published OAuth App Integration. + + Create Published OAuth App Integration. + + You can retrieve the published oauth app integration via :method:PublishedAppIntegration/get. + + :param app_id: str (optional) + app_id of the oauth published app integration. For example power-bi, tableau-deskop + :param token_access_policy: :class:`TokenAccessPolicy` (optional) + Token access policy + + :returns: :class:`CreatePublishedAppIntegrationOutput` + + + .. py:method:: delete(integration_id: str) + + Delete Published OAuth App Integration. + + Delete an existing Published OAuth App Integration. You can retrieve the published oauth app + integration via :method:PublishedAppIntegration/get. + + :param integration_id: str + The oauth app integration ID. + + + + + .. py:method:: get(integration_id: str) -> GetPublishedAppIntegrationOutput + + Get OAuth Published App Integration. + + Gets the Published OAuth App Integration for the given integration id. + + :param integration_id: str + The oauth app integration ID. + + :returns: :class:`GetPublishedAppIntegrationOutput` + + + .. py:method:: list() -> Iterator[GetPublishedAppIntegrationOutput] + + Get published oauth app integrations. + + Get the list of published oauth app integrations for the specified Databricks account + + :returns: Iterator over :class:`GetPublishedAppIntegrationOutput` + + + .. py:method:: update(integration_id: str [, token_access_policy: Optional[TokenAccessPolicy]]) + + Updates Published OAuth App Integration. + + Updates an existing published OAuth App Integration. You can retrieve the published oauth app + integration via :method:PublishedAppIntegration/get. + + :param integration_id: str + The oauth app integration ID. + :param token_access_policy: :class:`TokenAccessPolicy` (optional) + Token access policy to be updated in the published oauth app integration + + + \ No newline at end of file diff --git a/docs/account/oauth2/service_principal_secrets.rst b/docs/account/oauth2/service_principal_secrets.rst new file mode 100644 index 000000000..4249b9dea --- /dev/null +++ b/docs/account/oauth2/service_principal_secrets.rst @@ -0,0 +1,56 @@ +``a.service_principal_secrets``: Service Principal Secrets +========================================================== +.. currentmodule:: databricks.sdk.service.oauth2 + +.. py:class:: ServicePrincipalSecretsAPI + + These APIs enable administrators to manage service principal secrets. + + You can use the generated secrets to obtain OAuth access tokens for a service principal, which can then be + used to access Databricks Accounts and Workspace APIs. For more information, see [Authentication using + OAuth tokens for service principals], + + In addition, the generated secrets can be used to configure the Databricks Terraform Provider to + authenticate with the service principal. For more information, see [Databricks Terraform Provider]. + + [Authentication using OAuth tokens for service principals]: https://docs.databricks.com/dev-tools/authentication-oauth.html + [Databricks Terraform Provider]: https://github.com/databricks/terraform-provider-databricks/blob/master/docs/index.md#authenticating-with-service-principal + + .. py:method:: create(service_principal_id: int) -> CreateServicePrincipalSecretResponse + + Create service principal secret. + + Create a secret for the given service principal. + + :param service_principal_id: int + The service principal ID. + + :returns: :class:`CreateServicePrincipalSecretResponse` + + + .. py:method:: delete(service_principal_id: int, secret_id: str) + + Delete service principal secret. + + Delete a secret from the given service principal. + + :param service_principal_id: int + The service principal ID. + :param secret_id: str + The secret ID. + + + + + .. py:method:: list(service_principal_id: int) -> Iterator[SecretInfo] + + List service principal secrets. + + List all secrets associated with the given service principal. This operation only returns information + about the secrets themselves and does not include the secret values. + + :param service_principal_id: int + The service principal ID. + + :returns: Iterator over :class:`SecretInfo` + \ No newline at end of file diff --git a/docs/account/provisioning/credentials.rst b/docs/account/provisioning/credentials.rst new file mode 100644 index 000000000..5255a6a29 --- /dev/null +++ b/docs/account/provisioning/credentials.rst @@ -0,0 +1,123 @@ +``a.credentials``: Credential configurations +============================================ +.. currentmodule:: databricks.sdk.service.provisioning + +.. py:class:: CredentialsAPI + + These APIs manage credential configurations for this workspace. Databricks needs access to a cross-account + service IAM role in your AWS account so that Databricks can deploy clusters in the appropriate VPC for the + new workspace. A credential configuration encapsulates this role information, and its ID is used when + creating a new workspace. + + .. py:method:: create(credentials_name: str, aws_credentials: CreateCredentialAwsCredentials) -> Credential + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + role = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_CROSSACCOUNT_ARN"]))) + + # cleanup + a.credentials.delete(credentials_id=role.credentials_id) + + Create credential configuration. + + Creates a Databricks credential configuration that represents cloud cross-account credentials for a + specified account. Databricks uses this to set up network infrastructure properly to host Databricks + clusters. For your AWS IAM role, you need to trust the External ID (the Databricks Account API account + ID) in the returned credential object, and configure the required access policy. + + Save the response's `credentials_id` field, which is the ID for your new credential configuration + object. + + For information about how to create a new workspace with this API, see [Create a new workspace using + the Account API] + + [Create a new workspace using the Account API]: http://docs.databricks.com/administration-guide/account-api/new-workspace.html + + :param credentials_name: str + The human-readable name of the credential configuration object. + :param aws_credentials: :class:`CreateCredentialAwsCredentials` + + :returns: :class:`Credential` + + + .. py:method:: delete(credentials_id: str) + + Delete credential configuration. + + Deletes a Databricks credential configuration object for an account, both specified by ID. You cannot + delete a credential that is associated with any workspace. + + :param credentials_id: str + Databricks Account API credential configuration ID + + + + + .. py:method:: get(credentials_id: str) -> Credential + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + role = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_CROSSACCOUNT_ARN"]))) + + by_id = a.credentials.get(credentials_id=role.credentials_id) + + # cleanup + a.credentials.delete(credentials_id=role.credentials_id) + + Get credential configuration. + + Gets a Databricks credential configuration object for an account, both specified by ID. + + :param credentials_id: str + Databricks Account API credential configuration ID + + :returns: :class:`Credential` + + + .. py:method:: list() -> Iterator[Credential] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + configs = a.credentials.list() + + Get all credential configurations. + + Gets all Databricks credential configurations associated with an account specified by ID. + + :returns: Iterator over :class:`Credential` + \ No newline at end of file diff --git a/docs/account/provisioning/encryption_keys.rst b/docs/account/provisioning/encryption_keys.rst new file mode 100644 index 000000000..c711727c5 --- /dev/null +++ b/docs/account/provisioning/encryption_keys.rst @@ -0,0 +1,150 @@ +``a.encryption_keys``: Key configurations +========================================= +.. currentmodule:: databricks.sdk.service.provisioning + +.. py:class:: EncryptionKeysAPI + + These APIs manage encryption key configurations for this workspace (optional). A key configuration + encapsulates the AWS KMS key information and some information about how the key configuration can be used. + There are two possible uses for key configurations: + + * Managed services: A key configuration can be used to encrypt a workspace's notebook and secret data in + the control plane, as well as Databricks SQL queries and query history. * Storage: A key configuration can + be used to encrypt a workspace's DBFS and EBS data in the data plane. + + In both of these cases, the key configuration's ID is used when creating a new workspace. This Preview + feature is available if your account is on the E2 version of the platform. Updating a running workspace + with workspace storage encryption requires that the workspace is on the E2 version of the platform. If you + have an older workspace, it might not be on the E2 version of the platform. If you are not sure, contact + your Databricks representative. + + .. py:method:: create(use_cases: List[KeyUseCase] [, aws_key_info: Optional[CreateAwsKeyInfo], gcp_key_info: Optional[CreateGcpKeyInfo]]) -> CustomerManagedKey + + + Usage: + + .. code-block:: + + import os + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + created = a.encryption_keys.create(aws_key_info=provisioning.CreateAwsKeyInfo( + key_arn=os.environ["TEST_MANAGED_KMS_KEY_ARN"], key_alias=os.environ["TEST_STORAGE_KMS_KEY_ALIAS"]), + use_cases=[provisioning.KeyUseCase.MANAGED_SERVICES]) + + # cleanup + a.encryption_keys.delete(customer_managed_key_id=created.customer_managed_key_id) + + Create encryption key configuration. + + Creates a customer-managed key configuration object for an account, specified by ID. This operation + uploads a reference to a customer-managed key to Databricks. If the key is assigned as a workspace's + customer-managed key for managed services, Databricks uses the key to encrypt the workspaces notebooks + and secrets in the control plane, in addition to Databricks SQL queries and query history. If it is + specified as a workspace's customer-managed key for workspace storage, the key encrypts the + workspace's root S3 bucket (which contains the workspace's root DBFS and system data) and, optionally, + cluster EBS volume data. + + **Important**: Customer-managed keys are supported only for some deployment types, subscription types, + and AWS regions that currently support creation of Databricks workspaces. + + This operation is available only if your account is on the E2 version of the platform or on a select + custom plan that allows multiple workspaces per account. + + :param use_cases: List[:class:`KeyUseCase`] + The cases that the key can be used for. + :param aws_key_info: :class:`CreateAwsKeyInfo` (optional) + :param gcp_key_info: :class:`CreateGcpKeyInfo` (optional) + + :returns: :class:`CustomerManagedKey` + + + .. py:method:: delete(customer_managed_key_id: str) + + Delete encryption key configuration. + + Deletes a customer-managed key configuration object for an account. You cannot delete a configuration + that is associated with a running workspace. + + :param customer_managed_key_id: str + Databricks encryption key configuration ID. + + + + + .. py:method:: get(customer_managed_key_id: str) -> CustomerManagedKey + + + Usage: + + .. code-block:: + + import os + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + created = a.encryption_keys.create(aws_key_info=provisioning.CreateAwsKeyInfo( + key_arn=os.environ["TEST_MANAGED_KMS_KEY_ARN"], key_alias=os.environ["TEST_STORAGE_KMS_KEY_ALIAS"]), + use_cases=[provisioning.KeyUseCase.MANAGED_SERVICES]) + + by_id = a.encryption_keys.get(customer_managed_key_id=created.customer_managed_key_id) + + # cleanup + a.encryption_keys.delete(customer_managed_key_id=created.customer_managed_key_id) + + Get encryption key configuration. + + Gets a customer-managed key configuration object for an account, specified by ID. This operation + uploads a reference to a customer-managed key to Databricks. If assigned as a workspace's + customer-managed key for managed services, Databricks uses the key to encrypt the workspaces notebooks + and secrets in the control plane, in addition to Databricks SQL queries and query history. If it is + specified as a workspace's customer-managed key for storage, the key encrypts the workspace's root S3 + bucket (which contains the workspace's root DBFS and system data) and, optionally, cluster EBS volume + data. + + **Important**: Customer-managed keys are supported only for some deployment types, subscription types, + and AWS regions. + + This operation is available only if your account is on the E2 version of the platform.", + + :param customer_managed_key_id: str + Databricks encryption key configuration ID. + + :returns: :class:`CustomerManagedKey` + + + .. py:method:: list() -> Iterator[CustomerManagedKey] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + all = a.encryption_keys.list() + + Get all encryption key configurations. + + Gets all customer-managed key configuration objects for an account. If the key is specified as a + workspace's managed services customer-managed key, Databricks uses the key to encrypt the workspace's + notebooks and secrets in the control plane, in addition to Databricks SQL queries and query history. + If the key is specified as a workspace's storage customer-managed key, the key is used to encrypt the + workspace's root S3 bucket and optionally can encrypt cluster EBS volumes data in the data plane. + + **Important**: Customer-managed keys are supported only for some deployment types, subscription types, + and AWS regions. + + This operation is available only if your account is on the E2 version of the platform. + + :returns: Iterator over :class:`CustomerManagedKey` + \ No newline at end of file diff --git a/docs/account/provisioning/index.rst b/docs/account/provisioning/index.rst new file mode 100644 index 000000000..46a328b68 --- /dev/null +++ b/docs/account/provisioning/index.rst @@ -0,0 +1,16 @@ + +Provisioning +============ + +Resource management for secure Databricks Workspace deployment, cross-account IAM roles, storage, encryption, networking and private access. + +.. toctree:: + :maxdepth: 1 + + credentials + encryption_keys + networks + private_access + storage + vpc_endpoints + workspaces \ No newline at end of file diff --git a/docs/account/provisioning/networks.rst b/docs/account/provisioning/networks.rst new file mode 100644 index 000000000..e7491f202 --- /dev/null +++ b/docs/account/provisioning/networks.rst @@ -0,0 +1,123 @@ +``a.networks``: Network configurations +====================================== +.. currentmodule:: databricks.sdk.service.provisioning + +.. py:class:: NetworksAPI + + These APIs manage network configurations for customer-managed VPCs (optional). Its ID is used when + creating a new workspace if you use customer-managed VPCs. + + .. py:method:: create(network_name: str [, gcp_network_info: Optional[GcpNetworkInfo], security_group_ids: Optional[List[str]], subnet_ids: Optional[List[str]], vpc_endpoints: Optional[NetworkVpcEndpoints], vpc_id: Optional[str]]) -> Network + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + netw = a.networks.create(network_name=f'sdk-{time.time_ns()}', + vpc_id=hex(time.time_ns())[2:], + subnet_ids=[hex(time.time_ns())[2:], + hex(time.time_ns())[2:]], + security_group_ids=[hex(time.time_ns())[2:]]) + + Create network configuration. + + Creates a Databricks network configuration that represents an VPC and its resources. The VPC will be + used for new Databricks clusters. This requires a pre-existing VPC and subnets. + + :param network_name: str + The human-readable name of the network configuration. + :param gcp_network_info: :class:`GcpNetworkInfo` (optional) + The Google Cloud specific information for this network (for example, the VPC ID, subnet ID, and + secondary IP ranges). + :param security_group_ids: List[str] (optional) + IDs of one to five security groups associated with this network. Security group IDs **cannot** be + used in multiple network configurations. + :param subnet_ids: List[str] (optional) + IDs of at least two subnets associated with this network. Subnet IDs **cannot** be used in multiple + network configurations. + :param vpc_endpoints: :class:`NetworkVpcEndpoints` (optional) + If specified, contains the VPC endpoints used to allow cluster communication from this VPC over [AWS + PrivateLink]. + + [AWS PrivateLink]: https://aws.amazon.com/privatelink/ + :param vpc_id: str (optional) + The ID of the VPC associated with this network. VPC IDs can be used in multiple network + configurations. + + :returns: :class:`Network` + + + .. py:method:: delete(network_id: str) + + Delete a network configuration. + + Deletes a Databricks network configuration, which represents a cloud VPC and its resources. You cannot + delete a network that is associated with a workspace. + + This operation is available only if your account is on the E2 version of the platform. + + :param network_id: str + Databricks Account API network configuration ID. + + + + + .. py:method:: get(network_id: str) -> Network + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + netw = a.networks.create(network_name=f'sdk-{time.time_ns()}', + vpc_id=hex(time.time_ns())[2:], + subnet_ids=[hex(time.time_ns())[2:], + hex(time.time_ns())[2:]], + security_group_ids=[hex(time.time_ns())[2:]]) + + by_id = a.networks.get(network_id=netw.network_id) + + Get a network configuration. + + Gets a Databricks network configuration, which represents a cloud VPC and its resources. + + :param network_id: str + Databricks Account API network configuration ID. + + :returns: :class:`Network` + + + .. py:method:: list() -> Iterator[Network] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + configs = a.networks.list() + + Get all network configurations. + + Gets a list of all Databricks network configurations for an account, specified by ID. + + This operation is available only if your account is on the E2 version of the platform. + + :returns: Iterator over :class:`Network` + \ No newline at end of file diff --git a/docs/account/provisioning/private_access.rst b/docs/account/provisioning/private_access.rst new file mode 100644 index 000000000..10022068e --- /dev/null +++ b/docs/account/provisioning/private_access.rst @@ -0,0 +1,226 @@ +``a.private_access``: Private Access Settings +============================================= +.. currentmodule:: databricks.sdk.service.provisioning + +.. py:class:: PrivateAccessAPI + + These APIs manage private access settings for this account. + + .. py:method:: create(private_access_settings_name: str, region: str [, allowed_vpc_endpoint_ids: Optional[List[str]], private_access_level: Optional[PrivateAccessLevel], public_access_enabled: Optional[bool]]) -> PrivateAccessSettings + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + created = a.private_access.create(private_access_settings_name=f'sdk-{time.time_ns()}', + region=os.environ["AWS_REGION"]) + + # cleanup + a.private_access.delete(private_access_settings_id=created.private_access_settings_id) + + Create private access settings. + + Creates a private access settings object, which specifies how your workspace is accessed over [AWS + PrivateLink]. To use AWS PrivateLink, a workspace must have a private access settings object + referenced by ID in the workspace's `private_access_settings_id` property. + + You can share one private access settings with multiple workspaces in a single account. However, + private access settings are specific to AWS regions, so only workspaces in the same AWS region can use + a given private access settings object. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink]. + + [AWS PrivateLink]: https://aws.amazon.com/privatelink + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + + :param private_access_settings_name: str + The human-readable name of the private access settings object. + :param region: str + The cloud region for workspaces associated with this private access settings object. + :param allowed_vpc_endpoint_ids: List[str] (optional) + An array of Databricks VPC endpoint IDs. This is the Databricks ID that is returned when registering + the VPC endpoint configuration in your Databricks account. This is not the ID of the VPC endpoint in + AWS. + + Only used when `private_access_level` is set to `ENDPOINT`. This is an allow list of VPC endpoints + that in your account that can connect to your workspace over AWS PrivateLink. + + If hybrid access to your workspace is enabled by setting `public_access_enabled` to `true`, this + control only works for PrivateLink connections. To control how your workspace is accessed via public + internet, see [IP access lists]. + + [IP access lists]: https://docs.databricks.com/security/network/ip-access-list.html + :param private_access_level: :class:`PrivateAccessLevel` (optional) + The private access level controls which VPC endpoints can connect to the UI or API of any workspace + that attaches this private access settings object. * `ACCOUNT` level access (the default) allows + only VPC endpoints that are registered in your Databricks account connect to your workspace. * + `ENDPOINT` level access allows only specified VPC endpoints connect to your workspace. For details, + see `allowed_vpc_endpoint_ids`. + :param public_access_enabled: bool (optional) + Determines if the workspace can be accessed over public internet. For fully private workspaces, you + can optionally specify `false`, but only if you implement both the front-end and the back-end + PrivateLink connections. Otherwise, specify `true`, which means that public access is enabled. + + :returns: :class:`PrivateAccessSettings` + + + .. py:method:: delete(private_access_settings_id: str) + + Delete a private access settings object. + + Deletes a private access settings object, which determines how your workspace is accessed over [AWS + PrivateLink]. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink].", + + [AWS PrivateLink]: https://aws.amazon.com/privatelink + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + + :param private_access_settings_id: str + Databricks Account API private access settings ID. + + + + + .. py:method:: get(private_access_settings_id: str) -> PrivateAccessSettings + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + created = a.private_access.create(private_access_settings_name=f'sdk-{time.time_ns()}', + region=os.environ["AWS_REGION"]) + + by_id = a.private_access.get(private_access_settings_id=created.private_access_settings_id) + + # cleanup + a.private_access.delete(private_access_settings_id=created.private_access_settings_id) + + Get a private access settings object. + + Gets a private access settings object, which specifies how your workspace is accessed over [AWS + PrivateLink]. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink].", + + [AWS PrivateLink]: https://aws.amazon.com/privatelink + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + + :param private_access_settings_id: str + Databricks Account API private access settings ID. + + :returns: :class:`PrivateAccessSettings` + + + .. py:method:: list() -> Iterator[PrivateAccessSettings] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + all = a.private_access.list() + + Get all private access settings objects. + + Gets a list of all private access settings objects for an account, specified by ID. + + :returns: Iterator over :class:`PrivateAccessSettings` + + + .. py:method:: replace(private_access_settings_id: str, private_access_settings_name: str, region: str [, allowed_vpc_endpoint_ids: Optional[List[str]], private_access_level: Optional[PrivateAccessLevel], public_access_enabled: Optional[bool]]) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + created = a.private_access.create(private_access_settings_name=f'sdk-{time.time_ns()}', + region=os.environ["AWS_REGION"]) + + a.private_access.replace(private_access_settings_id=created.private_access_settings_id, + private_access_settings_name=f'sdk-{time.time_ns()}', + region=os.environ["AWS_REGION"]) + + # cleanup + a.private_access.delete(private_access_settings_id=created.private_access_settings_id) + + Replace private access settings. + + Updates an existing private access settings object, which specifies how your workspace is accessed + over [AWS PrivateLink]. To use AWS PrivateLink, a workspace must have a private access settings object + referenced by ID in the workspace's `private_access_settings_id` property. + + This operation completely overwrites your existing private access settings object attached to your + workspaces. All workspaces attached to the private access settings are affected by any change. If + `public_access_enabled`, `private_access_level`, or `allowed_vpc_endpoint_ids` are updated, effects of + these changes might take several minutes to propagate to the workspace API. + + You can share one private access settings object with multiple workspaces in a single account. + However, private access settings are specific to AWS regions, so only workspaces in the same AWS + region can use a given private access settings object. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink]. + + [AWS PrivateLink]: https://aws.amazon.com/privatelink + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + + :param private_access_settings_id: str + Databricks Account API private access settings ID. + :param private_access_settings_name: str + The human-readable name of the private access settings object. + :param region: str + The cloud region for workspaces associated with this private access settings object. + :param allowed_vpc_endpoint_ids: List[str] (optional) + An array of Databricks VPC endpoint IDs. This is the Databricks ID that is returned when registering + the VPC endpoint configuration in your Databricks account. This is not the ID of the VPC endpoint in + AWS. + + Only used when `private_access_level` is set to `ENDPOINT`. This is an allow list of VPC endpoints + that in your account that can connect to your workspace over AWS PrivateLink. + + If hybrid access to your workspace is enabled by setting `public_access_enabled` to `true`, this + control only works for PrivateLink connections. To control how your workspace is accessed via public + internet, see [IP access lists]. + + [IP access lists]: https://docs.databricks.com/security/network/ip-access-list.html + :param private_access_level: :class:`PrivateAccessLevel` (optional) + The private access level controls which VPC endpoints can connect to the UI or API of any workspace + that attaches this private access settings object. * `ACCOUNT` level access (the default) allows + only VPC endpoints that are registered in your Databricks account connect to your workspace. * + `ENDPOINT` level access allows only specified VPC endpoints connect to your workspace. For details, + see `allowed_vpc_endpoint_ids`. + :param public_access_enabled: bool (optional) + Determines if the workspace can be accessed over public internet. For fully private workspaces, you + can optionally specify `false`, but only if you implement both the front-end and the back-end + PrivateLink connections. Otherwise, specify `true`, which means that public access is enabled. + + + \ No newline at end of file diff --git a/docs/account/provisioning/storage.rst b/docs/account/provisioning/storage.rst new file mode 100644 index 000000000..611a8cdc6 --- /dev/null +++ b/docs/account/provisioning/storage.rst @@ -0,0 +1,114 @@ +``a.storage``: Storage configurations +===================================== +.. currentmodule:: databricks.sdk.service.provisioning + +.. py:class:: StorageAPI + + These APIs manage storage configurations for this workspace. A root storage S3 bucket in your account is + required to store objects like cluster logs, notebook revisions, and job results. You can also use the + root storage S3 bucket for storage of non-production DBFS data. A storage configuration encapsulates this + bucket information, and its ID is used when creating a new workspace. + + .. py:method:: create(storage_configuration_name: str, root_bucket_info: RootBucketInfo) -> StorageConfiguration + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + storage = a.storage.create( + storage_configuration_name=f'sdk-{time.time_ns()}', + root_bucket_info=provisioning.RootBucketInfo(bucket_name=os.environ["TEST_ROOT_BUCKET"])) + + # cleanup + a.storage.delete(storage_configuration_id=storage.storage_configuration_id) + + Create new storage configuration. + + Creates new storage configuration for an account, specified by ID. Uploads a storage configuration + object that represents the root AWS S3 bucket in your account. Databricks stores related workspace + assets including DBFS, cluster logs, and job results. For the AWS S3 bucket, you need to configure the + required bucket policy. + + For information about how to create a new workspace with this API, see [Create a new workspace using + the Account API] + + [Create a new workspace using the Account API]: http://docs.databricks.com/administration-guide/account-api/new-workspace.html + + :param storage_configuration_name: str + The human-readable name of the storage configuration. + :param root_bucket_info: :class:`RootBucketInfo` + Root S3 bucket information. + + :returns: :class:`StorageConfiguration` + + + .. py:method:: delete(storage_configuration_id: str) + + Delete storage configuration. + + Deletes a Databricks storage configuration. You cannot delete a storage configuration that is + associated with any workspace. + + :param storage_configuration_id: str + Databricks Account API storage configuration ID. + + + + + .. py:method:: get(storage_configuration_id: str) -> StorageConfiguration + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + storage = a.storage.create(storage_configuration_name=f'sdk-{time.time_ns()}', + root_bucket_info=provisioning.RootBucketInfo(bucket_name=f'sdk-{time.time_ns()}')) + + by_id = a.storage.get(storage_configuration_id=storage.storage_configuration_id) + + Get storage configuration. + + Gets a Databricks storage configuration for an account, both specified by ID. + + :param storage_configuration_id: str + Databricks Account API storage configuration ID. + + :returns: :class:`StorageConfiguration` + + + .. py:method:: list() -> Iterator[StorageConfiguration] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + configs = a.storage.list() + + Get all storage configurations. + + Gets a list of all Databricks storage configurations for your account, specified by ID. + + :returns: Iterator over :class:`StorageConfiguration` + \ No newline at end of file diff --git a/docs/account/provisioning/vpc_endpoints.rst b/docs/account/provisioning/vpc_endpoints.rst new file mode 100644 index 000000000..d2622dc0f --- /dev/null +++ b/docs/account/provisioning/vpc_endpoints.rst @@ -0,0 +1,135 @@ +``a.vpc_endpoints``: VPC Endpoint Configurations +================================================ +.. currentmodule:: databricks.sdk.service.provisioning + +.. py:class:: VpcEndpointsAPI + + These APIs manage VPC endpoint configurations for this account. + + .. py:method:: create(vpc_endpoint_name: str [, aws_vpc_endpoint_id: Optional[str], gcp_vpc_endpoint_info: Optional[GcpVpcEndpointInfo], region: Optional[str]]) -> VpcEndpoint + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + created = a.vpc_endpoints.create(aws_vpc_endpoint_id=os.environ["TEST_RELAY_VPC_ENDPOINT"], + region=os.environ["AWS_REGION"], + vpc_endpoint_name=f'sdk-{time.time_ns()}') + + # cleanup + a.vpc_endpoints.delete(vpc_endpoint_id=created.vpc_endpoint_id) + + Create VPC endpoint configuration. + + Creates a VPC endpoint configuration, which represents a [VPC endpoint] object in AWS used to + communicate privately with Databricks over [AWS PrivateLink]. + + After you create the VPC endpoint configuration, the Databricks [endpoint service] automatically + accepts the VPC endpoint. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink]. + + [AWS PrivateLink]: https://aws.amazon.com/privatelink + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + [VPC endpoint]: https://docs.aws.amazon.com/vpc/latest/privatelink/vpc-endpoints.html + [endpoint service]: https://docs.aws.amazon.com/vpc/latest/privatelink/privatelink-share-your-services.html + + :param vpc_endpoint_name: str + The human-readable name of the storage configuration. + :param aws_vpc_endpoint_id: str (optional) + The ID of the VPC endpoint object in AWS. + :param gcp_vpc_endpoint_info: :class:`GcpVpcEndpointInfo` (optional) + The Google Cloud specific information for this Private Service Connect endpoint. + :param region: str (optional) + The AWS region in which this VPC endpoint object exists. + + :returns: :class:`VpcEndpoint` + + + .. py:method:: delete(vpc_endpoint_id: str) + + Delete VPC endpoint configuration. + + Deletes a VPC endpoint configuration, which represents an [AWS VPC endpoint] that can communicate + privately with Databricks over [AWS PrivateLink]. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink]. + + [AWS PrivateLink]: https://aws.amazon.com/privatelink + [AWS VPC endpoint]: https://docs.aws.amazon.com/vpc/latest/privatelink/concepts.html + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + + :param vpc_endpoint_id: str + Databricks VPC endpoint ID. + + + + + .. py:method:: get(vpc_endpoint_id: str) -> VpcEndpoint + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + created = a.vpc_endpoints.create(aws_vpc_endpoint_id=os.environ["TEST_RELAY_VPC_ENDPOINT"], + region=os.environ["AWS_REGION"], + vpc_endpoint_name=f'sdk-{time.time_ns()}') + + by_id = a.vpc_endpoints.get(vpc_endpoint_id=created.vpc_endpoint_id) + + # cleanup + a.vpc_endpoints.delete(vpc_endpoint_id=created.vpc_endpoint_id) + + Get a VPC endpoint configuration. + + Gets a VPC endpoint configuration, which represents a [VPC endpoint] object in AWS used to communicate + privately with Databricks over [AWS PrivateLink]. + + [AWS PrivateLink]: https://aws.amazon.com/privatelink + [VPC endpoint]: https://docs.aws.amazon.com/vpc/latest/privatelink/concepts.html + + :param vpc_endpoint_id: str + Databricks VPC endpoint ID. + + :returns: :class:`VpcEndpoint` + + + .. py:method:: list() -> Iterator[VpcEndpoint] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + all = a.vpc_endpoints.list() + + Get all VPC endpoint configurations. + + Gets a list of all VPC endpoints for an account, specified by ID. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink]. + + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + + :returns: Iterator over :class:`VpcEndpoint` + \ No newline at end of file diff --git a/docs/account/provisioning/workspaces.rst b/docs/account/provisioning/workspaces.rst new file mode 100644 index 000000000..41f46f881 --- /dev/null +++ b/docs/account/provisioning/workspaces.rst @@ -0,0 +1,424 @@ +``a.workspaces``: Workspaces +============================ +.. currentmodule:: databricks.sdk.service.provisioning + +.. py:class:: WorkspacesAPI + + These APIs manage workspaces for this account. A Databricks workspace is an environment for accessing all + of your Databricks assets. The workspace organizes objects (notebooks, libraries, and experiments) into + folders, and provides access to data and computational resources such as clusters and jobs. + + These endpoints are available if your account is on the E2 version of the platform or on a select custom + plan that allows multiple workspaces per account. + + .. py:method:: create(workspace_name: str [, aws_region: Optional[str], cloud: Optional[str], cloud_resource_container: Optional[CloudResourceContainer], credentials_id: Optional[str], custom_tags: Optional[Dict[str, str]], deployment_name: Optional[str], gcp_managed_network_config: Optional[GcpManagedNetworkConfig], gke_config: Optional[GkeConfig], location: Optional[str], managed_services_customer_managed_key_id: Optional[str], network_id: Optional[str], pricing_tier: Optional[PricingTier], private_access_settings_id: Optional[str], storage_configuration_id: Optional[str], storage_customer_managed_key_id: Optional[str]]) -> Wait[Workspace] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + storage = a.storage.create( + storage_configuration_name=f'sdk-{time.time_ns()}', + root_bucket_info=provisioning.RootBucketInfo(bucket_name=os.environ["TEST_ROOT_BUCKET"])) + + role = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_CROSSACCOUNT_ARN"]))) + + created = a.workspaces.create(workspace_name=f'sdk-{time.time_ns()}', + aws_region=os.environ["AWS_REGION"], + credentials_id=role.credentials_id, + storage_configuration_id=storage.storage_configuration_id).result() + + # cleanup + a.storage.delete(storage_configuration_id=storage.storage_configuration_id) + a.credentials.delete(credentials_id=role.credentials_id) + a.workspaces.delete(workspace_id=created.workspace_id) + + Create a new workspace. + + Creates a new workspace. + + **Important**: This operation is asynchronous. A response with HTTP status code 200 means the request + has been accepted and is in progress, but does not mean that the workspace deployed successfully and + is running. The initial workspace status is typically `PROVISIONING`. Use the workspace ID + (`workspace_id`) field in the response to identify the new workspace and make repeated `GET` requests + with the workspace ID and check its status. The workspace becomes available when the status changes to + `RUNNING`. + + :param workspace_name: str + The workspace's human-readable name. + :param aws_region: str (optional) + The AWS region of the workspace's data plane. + :param cloud: str (optional) + The cloud provider which the workspace uses. For Google Cloud workspaces, always set this field to + `gcp`. + :param cloud_resource_container: :class:`CloudResourceContainer` (optional) + The general workspace configurations that are specific to cloud providers. + :param credentials_id: str (optional) + ID of the workspace's credential configuration object. + :param custom_tags: Dict[str,str] (optional) + The custom tags key-value pairing that is attached to this workspace. The key-value pair is a string + of utf-8 characters. The value can be an empty string, with maximum length of 255 characters. The + key can be of maximum length of 127 characters, and cannot be empty. + :param deployment_name: str (optional) + The deployment name defines part of the subdomain for the workspace. The workspace URL for the web + application and REST APIs is `.cloud.databricks.com`. For example, if the + deployment name is `abcsales`, your workspace URL will be `https://abcsales.cloud.databricks.com`. + Hyphens are allowed. This property supports only the set of characters that are allowed in a + subdomain. + + To set this value, you must have a deployment name prefix. Contact your Databricks account team to + add an account deployment name prefix to your account. + + Workspace deployment names follow the account prefix and a hyphen. For example, if your account's + deployment prefix is `acme` and the workspace deployment name is `workspace-1`, the JSON response + for the `deployment_name` field becomes `acme-workspace-1`. The workspace URL would be + `acme-workspace-1.cloud.databricks.com`. + + You can also set the `deployment_name` to the reserved keyword `EMPTY` if you want the deployment + name to only include the deployment prefix. For example, if your account's deployment prefix is + `acme` and the workspace deployment name is `EMPTY`, the `deployment_name` becomes `acme` only and + the workspace URL is `acme.cloud.databricks.com`. + + This value must be unique across all non-deleted deployments across all AWS regions. + + If a new workspace omits this property, the server generates a unique deployment name for you with + the pattern `dbc-xxxxxxxx-xxxx`. + :param gcp_managed_network_config: :class:`GcpManagedNetworkConfig` (optional) + The network settings for the workspace. The configurations are only for Databricks-managed VPCs. It + is ignored if you specify a customer-managed VPC in the `network_id` field.", All the IP range + configurations must be mutually exclusive. An attempt to create a workspace fails if Databricks + detects an IP range overlap. + + Specify custom IP ranges in CIDR format. The IP ranges for these fields must not overlap, and all IP + addresses must be entirely within the following ranges: `10.0.0.0/8`, `100.64.0.0/10`, + `172.16.0.0/12`, `192.168.0.0/16`, and `240.0.0.0/4`. + + The sizes of these IP ranges affect the maximum number of nodes for the workspace. + + **Important**: Confirm the IP ranges used by your Databricks workspace before creating the + workspace. You cannot change them after your workspace is deployed. If the IP address ranges for + your Databricks are too small, IP exhaustion can occur, causing your Databricks jobs to fail. To + determine the address range sizes that you need, Databricks provides a calculator as a Microsoft + Excel spreadsheet. See [calculate subnet sizes for a new workspace]. + + [calculate subnet sizes for a new workspace]: https://docs.gcp.databricks.com/administration-guide/cloud-configurations/gcp/network-sizing.html + :param gke_config: :class:`GkeConfig` (optional) + The configurations for the GKE cluster of a Databricks workspace. + :param location: str (optional) + The Google Cloud region of the workspace data plane in your Google account. For example, `us-east4`. + :param managed_services_customer_managed_key_id: str (optional) + The ID of the workspace's managed services encryption key configuration object. This is used to help + protect and control access to the workspace's notebooks, secrets, Databricks SQL queries, and query + history. The provided key configuration object property `use_cases` must contain `MANAGED_SERVICES`. + :param network_id: str (optional) + :param pricing_tier: :class:`PricingTier` (optional) + The pricing tier of the workspace. For pricing tier information, see [AWS Pricing]. + + [AWS Pricing]: https://databricks.com/product/aws-pricing + :param private_access_settings_id: str (optional) + ID of the workspace's private access settings object. Only used for PrivateLink. This ID must be + specified for customers using [AWS PrivateLink] for either front-end (user-to-workspace connection), + back-end (data plane to control plane connection), or both connection types. + + Before configuring PrivateLink, read the [Databricks article about PrivateLink].", + + [AWS PrivateLink]: https://aws.amazon.com/privatelink/ + [Databricks article about PrivateLink]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + :param storage_configuration_id: str (optional) + The ID of the workspace's storage configuration object. + :param storage_customer_managed_key_id: str (optional) + The ID of the workspace's storage encryption key configuration object. This is used to encrypt the + workspace's root S3 bucket (root DBFS and system data) and, optionally, cluster EBS volumes. The + provided key configuration object property `use_cases` must contain `STORAGE`. + + :returns: + Long-running operation waiter for :class:`Workspace`. + See :method:wait_get_workspace_running for more details. + + + .. py:method:: create_and_wait(workspace_name: str [, aws_region: Optional[str], cloud: Optional[str], cloud_resource_container: Optional[CloudResourceContainer], credentials_id: Optional[str], custom_tags: Optional[Dict[str, str]], deployment_name: Optional[str], gcp_managed_network_config: Optional[GcpManagedNetworkConfig], gke_config: Optional[GkeConfig], location: Optional[str], managed_services_customer_managed_key_id: Optional[str], network_id: Optional[str], pricing_tier: Optional[PricingTier], private_access_settings_id: Optional[str], storage_configuration_id: Optional[str], storage_customer_managed_key_id: Optional[str], timeout: datetime.timedelta = 0:20:00]) -> Workspace + + + .. py:method:: delete(workspace_id: int) + + Delete a workspace. + + Terminates and deletes a Databricks workspace. From an API perspective, deletion is immediate. + However, it might take a few minutes for all workspaces resources to be deleted, depending on the size + and number of workspace resources. + + This operation is available only if your account is on the E2 version of the platform or on a select + custom plan that allows multiple workspaces per account. + + :param workspace_id: int + Workspace ID. + + + + + .. py:method:: get(workspace_id: int) -> Workspace + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + storage = a.storage.create( + storage_configuration_name=f'sdk-{time.time_ns()}', + root_bucket_info=provisioning.RootBucketInfo(bucket_name=os.environ["TEST_ROOT_BUCKET"])) + + role = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_CROSSACCOUNT_ARN"]))) + + created = a.workspaces.create(workspace_name=f'sdk-{time.time_ns()}', + aws_region=os.environ["AWS_REGION"], + credentials_id=role.credentials_id, + storage_configuration_id=storage.storage_configuration_id).result() + + by_id = a.workspaces.get(workspace_id=created.workspace_id) + + # cleanup + a.storage.delete(storage_configuration_id=storage.storage_configuration_id) + a.credentials.delete(credentials_id=role.credentials_id) + a.workspaces.delete(workspace_id=created.workspace_id) + + Get a workspace. + + Gets information including status for a Databricks workspace, specified by ID. In the response, the + `workspace_status` field indicates the current status. After initial workspace creation (which is + asynchronous), make repeated `GET` requests with the workspace ID and check its status. The workspace + becomes available when the status changes to `RUNNING`. + + For information about how to create a new workspace with this API **including error handling**, see + [Create a new workspace using the Account API]. + + This operation is available only if your account is on the E2 version of the platform or on a select + custom plan that allows multiple workspaces per account. + + [Create a new workspace using the Account API]: http://docs.databricks.com/administration-guide/account-api/new-workspace.html + + :param workspace_id: int + Workspace ID. + + :returns: :class:`Workspace` + + + .. py:method:: list() -> Iterator[Workspace] + + + Usage: + + .. code-block:: + + from databricks.sdk import AccountClient + + a = AccountClient() + + all = a.workspaces.list() + + Get all workspaces. + + Gets a list of all workspaces associated with an account, specified by ID. + + This operation is available only if your account is on the E2 version of the platform or on a select + custom plan that allows multiple workspaces per account. + + :returns: Iterator over :class:`Workspace` + + + .. py:method:: update(workspace_id: int [, aws_region: Optional[str], credentials_id: Optional[str], custom_tags: Optional[Dict[str, str]], managed_services_customer_managed_key_id: Optional[str], network_connectivity_config_id: Optional[str], network_id: Optional[str], storage_configuration_id: Optional[str], storage_customer_managed_key_id: Optional[str]]) -> Wait[Workspace] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import provisioning + + a = AccountClient() + + storage = a.storage.create( + storage_configuration_name=f'sdk-{time.time_ns()}', + root_bucket_info=provisioning.RootBucketInfo(bucket_name=os.environ["TEST_ROOT_BUCKET"])) + + role = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_CROSSACCOUNT_ARN"]))) + + update_role = a.credentials.create( + credentials_name=f'sdk-{time.time_ns()}', + aws_credentials=provisioning.CreateCredentialAwsCredentials(sts_role=provisioning.CreateCredentialStsRole( + role_arn=os.environ["TEST_CROSSACCOUNT_ARN"]))) + + created = a.workspaces.create(workspace_name=f'sdk-{time.time_ns()}', + aws_region=os.environ["AWS_REGION"], + credentials_id=role.credentials_id, + storage_configuration_id=storage.storage_configuration_id).result() + + _ = a.workspaces.update(workspace_id=created.workspace_id, credentials_id=update_role.credentials_id).result() + + # cleanup + a.storage.delete(storage_configuration_id=storage.storage_configuration_id) + a.credentials.delete(credentials_id=role.credentials_id) + a.credentials.delete(credentials_id=update_role.credentials_id) + a.workspaces.delete(workspace_id=created.workspace_id) + + Update workspace configuration. + + Updates a workspace configuration for either a running workspace or a failed workspace. The elements + that can be updated varies between these two use cases. + + ### Update a failed workspace You can update a Databricks workspace configuration for failed workspace + deployment for some fields, but not all fields. For a failed workspace, this request supports updates + to the following fields only: - Credential configuration ID - Storage configuration ID - Network + configuration ID. Used only to add or change a network configuration for a customer-managed VPC. For a + failed workspace only, you can convert a workspace with Databricks-managed VPC to use a + customer-managed VPC by adding this ID. You cannot downgrade a workspace with a customer-managed VPC + to be a Databricks-managed VPC. You can update the network configuration for a failed or running + workspace to add PrivateLink support, though you must also add a private access settings object. - Key + configuration ID for managed services (control plane storage, such as notebook source and Databricks + SQL queries). Used only if you use customer-managed keys for managed services. - Key configuration ID + for workspace storage (root S3 bucket and, optionally, EBS volumes). Used only if you use + customer-managed keys for workspace storage. **Important**: If the workspace was ever in the running + state, even if briefly before becoming a failed workspace, you cannot add a new key configuration ID + for workspace storage. - Private access settings ID to add PrivateLink support. You can add or update + the private access settings ID to upgrade a workspace to add support for front-end, back-end, or both + types of connectivity. You cannot remove (downgrade) any existing front-end or back-end PrivateLink + support on a workspace. - Custom tags. Given you provide an empty custom tags, the update would not be + applied. + + After calling the `PATCH` operation to update the workspace configuration, make repeated `GET` + requests with the workspace ID and check the workspace status. The workspace is successful if the + status changes to `RUNNING`. + + For information about how to create a new workspace with this API **including error handling**, see + [Create a new workspace using the Account API]. + + ### Update a running workspace You can update a Databricks workspace configuration for running + workspaces for some fields, but not all fields. For a running workspace, this request supports + updating the following fields only: - Credential configuration ID + + - Network configuration ID. Used only if you already use a customer-managed VPC. You cannot convert a + running workspace from a Databricks-managed VPC to a customer-managed VPC. You can use a network + configuration update in this API for a failed or running workspace to add support for PrivateLink, + although you also need to add a private access settings object. + + - Key configuration ID for managed services (control plane storage, such as notebook source and + Databricks SQL queries). Databricks does not directly encrypt the data with the customer-managed key + (CMK). Databricks uses both the CMK and the Databricks managed key (DMK) that is unique to your + workspace to encrypt the Data Encryption Key (DEK). Databricks uses the DEK to encrypt your + workspace's managed services persisted data. If the workspace does not already have a CMK for managed + services, adding this ID enables managed services encryption for new or updated data. Existing managed + services data that existed before adding the key remains not encrypted with the DEK until it is + modified. If the workspace already has customer-managed keys for managed services, this request + rotates (changes) the CMK keys and the DEK is re-encrypted with the DMK and the new CMK. - Key + configuration ID for workspace storage (root S3 bucket and, optionally, EBS volumes). You can set this + only if the workspace does not already have a customer-managed key configuration for workspace + storage. - Private access settings ID to add PrivateLink support. You can add or update the private + access settings ID to upgrade a workspace to add support for front-end, back-end, or both types of + connectivity. You cannot remove (downgrade) any existing front-end or back-end PrivateLink support on + a workspace. - Custom tags. Given you provide an empty custom tags, the update would not be applied. + + **Important**: To update a running workspace, your workspace must have no running compute resources + that run in your workspace's VPC in the Classic data plane. For example, stop all all-purpose + clusters, job clusters, pools with running clusters, and Classic SQL warehouses. If you do not + terminate all cluster instances in the workspace before calling this API, the request will fail. + + ### Wait until changes take effect. After calling the `PATCH` operation to update the workspace + configuration, make repeated `GET` requests with the workspace ID and check the workspace status and + the status of the fields. * For workspaces with a Databricks-managed VPC, the workspace status becomes + `PROVISIONING` temporarily (typically under 20 minutes). If the workspace update is successful, the + workspace status changes to `RUNNING`. Note that you can also check the workspace status in the + [Account Console]. However, you cannot use or create clusters for another 20 minutes after that status + change. This results in a total of up to 40 minutes in which you cannot create clusters. If you create + or use clusters before this time interval elapses, clusters do not launch successfully, fail, or could + cause other unexpected behavior. + + * For workspaces with a customer-managed VPC, the workspace status stays at status `RUNNING` and the + VPC change happens immediately. A change to the storage customer-managed key configuration ID might + take a few minutes to update, so continue to check the workspace until you observe that it has been + updated. If the update fails, the workspace might revert silently to its original configuration. After + the workspace has been updated, you cannot use or create clusters for another 20 minutes. If you + create or use clusters before this time interval elapses, clusters do not launch successfully, fail, + or could cause other unexpected behavior. + + If you update the _storage_ customer-managed key configurations, it takes 20 minutes for the changes + to fully take effect. During the 20 minute wait, it is important that you stop all REST API calls to + the DBFS API. If you are modifying _only the managed services key configuration_, you can omit the 20 + minute wait. + + **Important**: Customer-managed keys and customer-managed VPCs are supported by only some deployment + types and subscription types. If you have questions about availability, contact your Databricks + representative. + + This operation is available only if your account is on the E2 version of the platform or on a select + custom plan that allows multiple workspaces per account. + + [Account Console]: https://docs.databricks.com/administration-guide/account-settings-e2/account-console-e2.html + [Create a new workspace using the Account API]: http://docs.databricks.com/administration-guide/account-api/new-workspace.html + + :param workspace_id: int + Workspace ID. + :param aws_region: str (optional) + The AWS region of the workspace's data plane (for example, `us-west-2`). This parameter is available + only for updating failed workspaces. + :param credentials_id: str (optional) + ID of the workspace's credential configuration object. This parameter is available for updating both + failed and running workspaces. + :param custom_tags: Dict[str,str] (optional) + The custom tags key-value pairing that is attached to this workspace. The key-value pair is a string + of utf-8 characters. The value can be an empty string, with maximum length of 255 characters. The + key can be of maximum length of 127 characters, and cannot be empty. + :param managed_services_customer_managed_key_id: str (optional) + The ID of the workspace's managed services encryption key configuration object. This parameter is + available only for updating failed workspaces. + :param network_connectivity_config_id: str (optional) + The ID of the network connectivity configuration object, which is the parent resource of this + private endpoint rule object. + :param network_id: str (optional) + The ID of the workspace's network configuration object. Used only if you already use a + customer-managed VPC. For failed workspaces only, you can switch from a Databricks-managed VPC to a + customer-managed VPC by updating the workspace to add a network configuration ID. + :param storage_configuration_id: str (optional) + The ID of the workspace's storage configuration object. This parameter is available only for + updating failed workspaces. + :param storage_customer_managed_key_id: str (optional) + The ID of the key configuration object for workspace storage. This parameter is available for + updating both failed and running workspaces. + + :returns: + Long-running operation waiter for :class:`Workspace`. + See :method:wait_get_workspace_running for more details. + + + .. py:method:: update_and_wait(workspace_id: int [, aws_region: Optional[str], credentials_id: Optional[str], custom_tags: Optional[Dict[str, str]], managed_services_customer_managed_key_id: Optional[str], network_connectivity_config_id: Optional[str], network_id: Optional[str], storage_configuration_id: Optional[str], storage_customer_managed_key_id: Optional[str], timeout: datetime.timedelta = 0:20:00]) -> Workspace + + + .. py:method:: wait_get_workspace_running(workspace_id: int, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[Workspace], None]]) -> Workspace diff --git a/docs/account/settings/index.rst b/docs/account/settings/index.rst new file mode 100644 index 000000000..ece6a3255 --- /dev/null +++ b/docs/account/settings/index.rst @@ -0,0 +1,12 @@ + +Settings +======== + +Manage security settings for Accounts and Workspaces + +.. toctree:: + :maxdepth: 1 + + ip_access_lists + network_connectivity + settings \ No newline at end of file diff --git a/docs/account/settings/ip_access_lists.rst b/docs/account/settings/ip_access_lists.rst new file mode 100644 index 000000000..135dd2b55 --- /dev/null +++ b/docs/account/settings/ip_access_lists.rst @@ -0,0 +1,227 @@ +``a.ip_access_lists``: Account IP Access Lists +============================================== +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: AccountIpAccessListsAPI + + The Accounts IP Access List API enables account admins to configure IP access lists for access to the + account console. + + Account IP Access Lists affect web application access and REST API access to the account console and + account APIs. If the feature is disabled for the account, all access is allowed for this account. There is + support for allow lists (inclusion) and block lists (exclusion). + + When a connection is attempted: 1. **First, all block lists are checked.** If the connection IP address + matches any block list, the connection is rejected. 2. **If the connection was not rejected by block + lists**, the IP address is compared with the allow lists. + + If there is at least one allow list for the account, the connection is allowed only if the IP address + matches an allow list. If there are no allow lists for the account, all IP addresses are allowed. + + For all allow lists and block lists combined, the account supports a maximum of 1000 IP/CIDR values, where + one CIDR counts as a single value. + + After changes to the account-level IP access lists, it can take a few minutes for changes to take effect. + + .. py:method:: create(label: str, list_type: ListType [, ip_addresses: Optional[List[str]]]) -> CreateIpAccessListResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import settings + + w = WorkspaceClient() + + created = w.ip_access_lists.create(label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/16"], + list_type=settings.ListType.BLOCK) + + # cleanup + w.ip_access_lists.delete(ip_access_list_id=created.ip_access_list.list_id) + + Create access list. + + Creates an IP access list for the account. + + A list can be an allow list or a block list. See the top of this file for a description of how the + server treats allow lists and block lists at runtime. + + When creating or updating an IP access list: + + * For all allow lists and block lists combined, the API supports a maximum of 1000 IP/CIDR values, + where one CIDR counts as a single value. Attempts to exceed that number return error 400 with + `error_code` value `QUOTA_EXCEEDED`. * If the new list would block the calling user's current IP, + error 400 is returned with `error_code` value `INVALID_STATE`. + + It can take a few minutes for the changes to take effect. + + :param label: str + Label for the IP access list. This **cannot** be empty. + :param list_type: :class:`ListType` + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. + :param ip_addresses: List[str] (optional) + + :returns: :class:`CreateIpAccessListResponse` + + + .. py:method:: delete(ip_access_list_id: str) + + Delete access list. + + Deletes an IP access list, specified by its list ID. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + + + + + .. py:method:: get(ip_access_list_id: str) -> GetIpAccessListResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import settings + + w = WorkspaceClient() + + created = w.ip_access_lists.create(label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/16"], + list_type=settings.ListType.BLOCK) + + by_id = w.ip_access_lists.get(ip_access_list_id=created.ip_access_list.list_id) + + # cleanup + w.ip_access_lists.delete(ip_access_list_id=created.ip_access_list.list_id) + + Get IP access list. + + Gets an IP access list, specified by its list ID. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + + :returns: :class:`GetIpAccessListResponse` + + + .. py:method:: list() -> Iterator[IpAccessListInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.ip_access_lists.list() + + Get access lists. + + Gets all IP access lists for the specified account. + + :returns: Iterator over :class:`IpAccessListInfo` + + + .. py:method:: replace(ip_access_list_id: str, label: str, list_type: ListType, enabled: bool [, ip_addresses: Optional[List[str]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import settings + + w = WorkspaceClient() + + created = w.ip_access_lists.create(label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/16"], + list_type=settings.ListType.BLOCK) + + w.ip_access_lists.replace(ip_access_list_id=created.ip_access_list.list_id, + label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/24"], + list_type=settings.ListType.BLOCK, + enabled=False) + + # cleanup + w.ip_access_lists.delete(ip_access_list_id=created.ip_access_list.list_id) + + Replace access list. + + Replaces an IP access list, specified by its ID. + + A list can include allow lists and block lists. See the top of this file for a description of how the + server treats allow lists and block lists at run time. When replacing an IP access list: * For all + allow lists and block lists combined, the API supports a maximum of 1000 IP/CIDR values, where one + CIDR counts as a single value. Attempts to exceed that number return error 400 with `error_code` value + `QUOTA_EXCEEDED`. * If the resulting list would block the calling user's current IP, error 400 is + returned with `error_code` value `INVALID_STATE`. It can take a few minutes for the changes to take + effect. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + :param label: str + Label for the IP access list. This **cannot** be empty. + :param list_type: :class:`ListType` + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. + :param enabled: bool + Specifies whether this IP access list is enabled. + :param ip_addresses: List[str] (optional) + + + + + .. py:method:: update(ip_access_list_id: str [, enabled: Optional[bool], ip_addresses: Optional[List[str]], label: Optional[str], list_type: Optional[ListType]]) + + Update access list. + + Updates an existing IP access list, specified by its ID. + + A list can include allow lists and block lists. See the top of this file for a description of how the + server treats allow lists and block lists at run time. + + When updating an IP access list: + + * For all allow lists and block lists combined, the API supports a maximum of 1000 IP/CIDR values, + where one CIDR counts as a single value. Attempts to exceed that number return error 400 with + `error_code` value `QUOTA_EXCEEDED`. * If the updated list would block the calling user's current IP, + error 400 is returned with `error_code` value `INVALID_STATE`. + + It can take a few minutes for the changes to take effect. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + :param enabled: bool (optional) + Specifies whether this IP access list is enabled. + :param ip_addresses: List[str] (optional) + :param label: str (optional) + Label for the IP access list. This **cannot** be empty. + :param list_type: :class:`ListType` (optional) + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. + + + \ No newline at end of file diff --git a/docs/account/settings/network_connectivity.rst b/docs/account/settings/network_connectivity.rst new file mode 100644 index 000000000..979491e4f --- /dev/null +++ b/docs/account/settings/network_connectivity.rst @@ -0,0 +1,146 @@ +``a.network_connectivity``: Network Connectivity +================================================ +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: NetworkConnectivityAPI + + These APIs provide configurations for the network connectivity of your workspaces for serverless compute + resources. This API provides stable subnets for your workspace so that you can configure your firewalls on + your Azure Storage accounts to allow access from Databricks. You can also use the API to provision private + endpoints for Databricks to privately connect serverless compute resources to your Azure resources using + Azure Private Link. See [configure serverless secure connectivity]. + + [configure serverless secure connectivity]: https://learn.microsoft.com/azure/databricks/security/network/serverless-network-security + + .. py:method:: create_network_connectivity_configuration(name: str, region: str) -> NetworkConnectivityConfiguration + + Create a network connectivity configuration. + + Creates a network connectivity configuration (NCC), which provides stable Azure service subnets when + accessing your Azure Storage accounts. You can also use a network connectivity configuration to create + Databricks-managed private endpoints so that Databricks serverless compute resources privately access + your resources. + + **IMPORTANT**: After you create the network connectivity configuration, you must assign one or more + workspaces to the new network connectivity configuration. You can share one network connectivity + configuration with multiple workspaces from the same Azure region within the same Databricks account. + See [configure serverless secure connectivity]. + + [configure serverless secure connectivity]: https://learn.microsoft.com/azure/databricks/security/network/serverless-network-security + + :param name: str + The name of the network connectivity configuration. The name can contain alphanumeric characters, + hyphens, and underscores. The length must be between 3 and 30 characters. The name must match the + regular expression `^[0-9a-zA-Z-_]{3,30}$`. + :param region: str + The Azure region for this network connectivity configuration. Only workspaces in the same Azure + region can be attached to this network connectivity configuration. + + :returns: :class:`NetworkConnectivityConfiguration` + + + .. py:method:: create_private_endpoint_rule(network_connectivity_config_id: str, resource_id: str, group_id: CreatePrivateEndpointRuleRequestGroupId) -> NccAzurePrivateEndpointRule + + Create a private endpoint rule. + + Create a private endpoint rule for the specified network connectivity config object. Once the object + is created, Databricks asynchronously provisions a new Azure private endpoint to your specified Azure + resource. + + **IMPORTANT**: You must use Azure portal or other Azure tools to approve the private endpoint to + complete the connection. To get the information of the private endpoint created, make a `GET` request + on the new private endpoint rule. See [serverless private link]. + + [serverless private link]: https://learn.microsoft.com/azure/databricks/security/network/serverless-network-security/serverless-private-link + + :param network_connectivity_config_id: str + Your Network Connectvity Configuration ID. + :param resource_id: str + The Azure resource ID of the target resource. + :param group_id: :class:`CreatePrivateEndpointRuleRequestGroupId` + The sub-resource type (group ID) of the target resource. Note that to connect to workspace root + storage (root DBFS), you need two endpoints, one for `blob` and one for `dfs`. + + :returns: :class:`NccAzurePrivateEndpointRule` + + + .. py:method:: delete_network_connectivity_configuration(network_connectivity_config_id: str) + + Delete a network connectivity configuration. + + Deletes a network connectivity configuration. + + :param network_connectivity_config_id: str + Your Network Connectvity Configuration ID. + + + + + .. py:method:: delete_private_endpoint_rule(network_connectivity_config_id: str, private_endpoint_rule_id: str) -> NccAzurePrivateEndpointRule + + Delete a private endpoint rule. + + Initiates deleting a private endpoint rule. The private endpoint will be deactivated and will be + purged after seven days of deactivation. When a private endpoint is in deactivated state, + `deactivated` field is set to `true` and the private endpoint is not available to your serverless + compute resources. + + :param network_connectivity_config_id: str + Your Network Connectvity Configuration ID. + :param private_endpoint_rule_id: str + Your private endpoint rule ID. + + :returns: :class:`NccAzurePrivateEndpointRule` + + + .. py:method:: get_network_connectivity_configuration(network_connectivity_config_id: str) -> NetworkConnectivityConfiguration + + Get a network connectivity configuration. + + Gets a network connectivity configuration. + + :param network_connectivity_config_id: str + Your Network Connectvity Configuration ID. + + :returns: :class:`NetworkConnectivityConfiguration` + + + .. py:method:: get_private_endpoint_rule(network_connectivity_config_id: str, private_endpoint_rule_id: str) -> NccAzurePrivateEndpointRule + + Get a private endpoint rule. + + Gets the private endpoint rule. + + :param network_connectivity_config_id: str + Your Network Connectvity Configuration ID. + :param private_endpoint_rule_id: str + Your private endpoint rule ID. + + :returns: :class:`NccAzurePrivateEndpointRule` + + + .. py:method:: list_network_connectivity_configurations( [, page_token: Optional[str]]) -> Iterator[NetworkConnectivityConfiguration] + + List network connectivity configurations. + + Gets an array of network connectivity configurations. + + :param page_token: str (optional) + Pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`NetworkConnectivityConfiguration` + + + .. py:method:: list_private_endpoint_rules(network_connectivity_config_id: str [, page_token: Optional[str]]) -> Iterator[NccAzurePrivateEndpointRule] + + List private endpoint rules. + + Gets an array of private endpoint rules. + + :param network_connectivity_config_id: str + Your Network Connectvity Configuration ID. + :param page_token: str (optional) + Pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`NccAzurePrivateEndpointRule` + \ No newline at end of file diff --git a/docs/account/settings/settings.rst b/docs/account/settings/settings.rst new file mode 100644 index 000000000..7f9d44534 --- /dev/null +++ b/docs/account/settings/settings.rst @@ -0,0 +1,58 @@ +``a.settings``: Personal Compute Enablement +=========================================== +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: AccountSettingsAPI + + The Personal Compute enablement setting lets you control which users can use the Personal Compute default + policy to create compute resources. By default all users in all workspaces have access (ON), but you can + change the setting to instead let individual workspaces configure access control (DELEGATE). + + There is only one instance of this setting per account. Since this setting has a default value, this + setting is present on all accounts even though it's never set on a given account. Deletion reverts the + value of the setting back to the default value. + + .. py:method:: delete_personal_compute_setting(etag: str) -> DeletePersonalComputeSettingResponse + + Delete Personal Compute setting. + + Reverts back the Personal Compute setting value to default (ON) + + :param etag: str + etag used for versioning. The response is at least as fresh as the eTag provided. This is used for + optimistic concurrency control as a way to help prevent simultaneous writes of a setting overwriting + each other. It is strongly suggested that systems make use of the etag in the read -> delete pattern + to perform setting deletions in order to avoid race conditions. That is, get an etag from a GET + request, and pass it with the DELETE request to identify the rule set version you are deleting. + + :returns: :class:`DeletePersonalComputeSettingResponse` + + + .. py:method:: read_personal_compute_setting(etag: str) -> PersonalComputeSetting + + Get Personal Compute setting. + + Gets the value of the Personal Compute setting. + + :param etag: str + etag used for versioning. The response is at least as fresh as the eTag provided. This is used for + optimistic concurrency control as a way to help prevent simultaneous writes of a setting overwriting + each other. It is strongly suggested that systems make use of the etag in the read -> delete pattern + to perform setting deletions in order to avoid race conditions. That is, get an etag from a GET + request, and pass it with the DELETE request to identify the rule set version you are deleting. + + :returns: :class:`PersonalComputeSetting` + + + .. py:method:: update_personal_compute_setting( [, allow_missing: Optional[bool], setting: Optional[PersonalComputeSetting]]) -> PersonalComputeSetting + + Update Personal Compute setting. + + Updates the value of the Personal Compute setting. + + :param allow_missing: bool (optional) + This should always be set to true for Settings RPCs. Added for AIP compliance. + :param setting: :class:`PersonalComputeSetting` (optional) + + :returns: :class:`PersonalComputeSetting` + \ No newline at end of file diff --git a/docs/autogen/billing.rst b/docs/autogen/billing.rst deleted file mode 100644 index dbc17aa98..000000000 --- a/docs/autogen/billing.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``billing``: Billing -==================== - -Configure different aspects of Databricks billing and usage. - -.. automodule:: databricks.sdk.service.billing - :members: - :undoc-members: diff --git a/docs/autogen/catalog.rst b/docs/autogen/catalog.rst deleted file mode 100644 index f61dc413f..000000000 --- a/docs/autogen/catalog.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``catalog``: Unity Catalog -========================== - -Configure data governance with Unity Catalog for metastores, catalogs, schemas, tables, external locations, and storage credentials - -.. automodule:: databricks.sdk.service.catalog - :members: - :undoc-members: diff --git a/docs/autogen/compute.rst b/docs/autogen/compute.rst deleted file mode 100644 index 26ff959a5..000000000 --- a/docs/autogen/compute.rst +++ /dev/null @@ -1,14 +0,0 @@ - -``compute``: Compute -==================== - -Use and configure compute for Databricks - -.. automodule:: databricks.sdk.service.compute - :members: - :undoc-members: - -.. automodule:: databricks.sdk.mixins.compute - :members: - :inherited-members: - :undoc-members: diff --git a/docs/autogen/files.rst b/docs/autogen/files.rst deleted file mode 100644 index e0aa0dbb6..000000000 --- a/docs/autogen/files.rst +++ /dev/null @@ -1,14 +0,0 @@ - -``files``: File Management -========================== - -Manage files on Databricks in a filesystem-like interface - -.. automodule:: databricks.sdk.service.files - :members: - :undoc-members: - -.. automodule:: databricks.sdk.mixins.files - :members: - :inherited-members: - :undoc-members: diff --git a/docs/autogen/iam.rst b/docs/autogen/iam.rst deleted file mode 100644 index da047eccf..000000000 --- a/docs/autogen/iam.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``iam``: Identity and Access Management -======================================= - -Manage users, service principals, groups and their permissions in Accounts and Workspaces - -.. automodule:: databricks.sdk.service.iam - :members: - :undoc-members: diff --git a/docs/autogen/jobs.rst b/docs/autogen/jobs.rst deleted file mode 100644 index cb406749c..000000000 --- a/docs/autogen/jobs.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``jobs``: Jobs -============== - -Schedule automated jobs on Databricks Workspaces - -.. automodule:: databricks.sdk.service.jobs - :members: - :undoc-members: diff --git a/docs/autogen/ml.rst b/docs/autogen/ml.rst deleted file mode 100644 index 6e51ec501..000000000 --- a/docs/autogen/ml.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``ml``: Machine Learning -======================== - -Create and manage experiments, features, and other machine learning artifacts - -.. automodule:: databricks.sdk.service.ml - :members: - :undoc-members: diff --git a/docs/autogen/oauth2.rst b/docs/autogen/oauth2.rst deleted file mode 100644 index d210688ec..000000000 --- a/docs/autogen/oauth2.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``oauth2``: OAuth -================= - -Configure OAuth 2.0 application registrations for Databricks - -.. automodule:: databricks.sdk.service.oauth2 - :members: - :undoc-members: diff --git a/docs/autogen/pipelines.rst b/docs/autogen/pipelines.rst deleted file mode 100644 index 091042bd5..000000000 --- a/docs/autogen/pipelines.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``pipelines``: Delta Live Tables -================================ - -Manage pipelines, runs, and other Delta Live Table resources - -.. automodule:: databricks.sdk.service.pipelines - :members: - :undoc-members: diff --git a/docs/autogen/provisioning.rst b/docs/autogen/provisioning.rst deleted file mode 100644 index c17a200fa..000000000 --- a/docs/autogen/provisioning.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``provisioning``: Provisioning -============================== - -Resource management for secure Databricks Workspace deployment, cross-account IAM roles, storage, encryption, networking and private access. - -.. automodule:: databricks.sdk.service.provisioning - :members: - :undoc-members: diff --git a/docs/autogen/serving.rst b/docs/autogen/serving.rst deleted file mode 100644 index 73f3e5aef..000000000 --- a/docs/autogen/serving.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``serving``: Real-time Serving -============================== - -Use real-time inference for machine learning - -.. automodule:: databricks.sdk.service.serving - :members: - :undoc-members: diff --git a/docs/autogen/settings.rst b/docs/autogen/settings.rst deleted file mode 100644 index f0d41a1e6..000000000 --- a/docs/autogen/settings.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``settings``: Settings -====================== - -Manage security settings for Accounts and Workspaces - -.. automodule:: databricks.sdk.service.settings - :members: - :undoc-members: diff --git a/docs/autogen/sharing.rst b/docs/autogen/sharing.rst deleted file mode 100644 index cef34fcd8..000000000 --- a/docs/autogen/sharing.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``sharing``: Delta Sharing -========================== - -Configure data sharing with Unity Catalog for providers, recipients, and shares - -.. automodule:: databricks.sdk.service.sharing - :members: - :undoc-members: diff --git a/docs/autogen/sql.rst b/docs/autogen/sql.rst deleted file mode 100644 index 1f816cb6f..000000000 --- a/docs/autogen/sql.rst +++ /dev/null @@ -1,9 +0,0 @@ - -``sql``: Databricks SQL -======================= - -Manage Databricks SQL assets, including warehouses, dashboards, queries and query history, and alerts - -.. automodule:: databricks.sdk.service.sql - :members: - :undoc-members: diff --git a/docs/autogen/workspace.rst b/docs/autogen/workspace.rst deleted file mode 100644 index 36fcad4b4..000000000 --- a/docs/autogen/workspace.rst +++ /dev/null @@ -1,14 +0,0 @@ - -``workspace``: Databricks Workspace -=================================== - -Manage workspace-level entities that include notebooks, Git checkouts, and secrets - -.. automodule:: databricks.sdk.service.workspace - :members: - :undoc-members: - -.. automodule:: databricks.sdk.mixins.workspace - :members: - :inherited-members: - :undoc-members: diff --git a/docs/clients.rst b/docs/clients.rst deleted file mode 100644 index 359c3ee3b..000000000 --- a/docs/clients.rst +++ /dev/null @@ -1,11 +0,0 @@ - -SDK Clients -=========== - -.. autoclass:: databricks.sdk.WorkspaceClient - :members: - :undoc-members: - -.. autoclass:: databricks.sdk.AccountClient - :members: - :undoc-members: diff --git a/docs/clients/account.rst b/docs/clients/account.rst new file mode 100644 index 000000000..7862cd978 --- /dev/null +++ b/docs/clients/account.rst @@ -0,0 +1,6 @@ +Account Client +============== + +.. autoclass:: databricks.sdk.AccountClient + :members: + :undoc-members: diff --git a/docs/clients/workspace.rst b/docs/clients/workspace.rst new file mode 100644 index 000000000..31bad0ec9 --- /dev/null +++ b/docs/clients/workspace.rst @@ -0,0 +1,6 @@ +Workspace Client +================ + +.. autoclass:: databricks.sdk.WorkspaceClient + :members: + :undoc-members: diff --git a/docs/conf.py b/docs/conf.py index 5bab1f4ba..d184be022 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,7 +18,8 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'myst_parser', 'enum_tools.autoenum' + 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'myst_parser', + 'db_sphinx_ext' ] templates_path = ['_templates'] @@ -44,3 +45,5 @@ autodoc_default_options = { 'member-order': 'bysource', } + +toc_object_entries = False diff --git a/docs/db_sphinx_ext.py b/docs/db_sphinx_ext.py new file mode 100644 index 000000000..71fc0010a --- /dev/null +++ b/docs/db_sphinx_ext.py @@ -0,0 +1,9 @@ +def remove_class_signature(app, what, name, obj, options, signature, return_annotation): + if what == "class": + # Set the signature to None for classes. Otherwise, there is duplication of the dataclass parameters and + # documentation, and there is far too much visual noise. + return (None, return_annotation) + return (signature, return_annotation) + +def setup(app): + app.connect('autodoc-process-signature', remove_class_signature) diff --git a/docs/dbdataclasses/billing.rst b/docs/dbdataclasses/billing.rst new file mode 100644 index 000000000..ac99067b1 --- /dev/null +++ b/docs/dbdataclasses/billing.rst @@ -0,0 +1,36 @@ +Billing +======= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.billing`` module. + +.. py:currentmodule:: databricks.sdk.service.billing +.. autoclass:: Budget + :members: +.. autoclass:: BudgetAlert + :members: +.. autoclass:: BudgetList + :members: +.. autoclass:: BudgetWithStatus + :members: +.. autoclass:: BudgetWithStatusStatusDailyItem + :members: +.. autoclass:: CreateLogDeliveryConfigurationParams + :members: +.. autoclass:: DownloadResponse + :members: +.. autoclass:: LogDeliveryConfiguration + :members: +.. autoclass:: LogDeliveryStatus + :members: +.. autoclass:: UpdateLogDeliveryConfigurationStatusRequest + :members: +.. autoclass:: WrappedBudget + :members: +.. autoclass:: WrappedBudgetWithStatus + :members: +.. autoclass:: WrappedCreateLogDeliveryConfiguration + :members: +.. autoclass:: WrappedLogDeliveryConfiguration + :members: +.. autoclass:: WrappedLogDeliveryConfigurations + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/catalog.rst b/docs/dbdataclasses/catalog.rst new file mode 100644 index 000000000..c4cc0a5b4 --- /dev/null +++ b/docs/dbdataclasses/catalog.rst @@ -0,0 +1,214 @@ +Unity Catalog +============= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.catalog`` module. + +.. py:currentmodule:: databricks.sdk.service.catalog +.. autoclass:: AccountsCreateMetastore + :members: +.. autoclass:: AccountsCreateMetastoreAssignment + :members: +.. autoclass:: AccountsCreateStorageCredential + :members: +.. autoclass:: AccountsMetastoreAssignment + :members: +.. autoclass:: AccountsMetastoreInfo + :members: +.. autoclass:: AccountsStorageCredentialInfo + :members: +.. autoclass:: AccountsUpdateMetastore + :members: +.. autoclass:: AccountsUpdateMetastoreAssignment + :members: +.. autoclass:: AccountsUpdateStorageCredential + :members: +.. autoclass:: ArtifactAllowlistInfo + :members: +.. autoclass:: ArtifactMatcher + :members: +.. autoclass:: AwsIamRole + :members: +.. autoclass:: AzureManagedIdentity + :members: +.. autoclass:: AzureServicePrincipal + :members: +.. autoclass:: CatalogInfo + :members: +.. autoclass:: CloudflareApiToken + :members: +.. autoclass:: ColumnInfo + :members: +.. autoclass:: ColumnMask + :members: +.. autoclass:: ConnectionInfo + :members: +.. autoclass:: CreateCatalog + :members: +.. autoclass:: CreateConnection + :members: +.. autoclass:: CreateExternalLocation + :members: +.. autoclass:: CreateFunction + :members: +.. autoclass:: CreateFunctionRequest + :members: +.. autoclass:: CreateMetastore + :members: +.. autoclass:: CreateMetastoreAssignment + :members: +.. autoclass:: CreateRegisteredModelRequest + :members: +.. autoclass:: CreateSchema + :members: +.. autoclass:: CreateStorageCredential + :members: +.. autoclass:: CreateTableConstraint + :members: +.. autoclass:: CreateVolumeRequestContent + :members: +.. autoclass:: CurrentWorkspaceBindings + :members: +.. autoclass:: DatabricksGcpServiceAccountResponse + :members: +.. autoclass:: DeltaRuntimePropertiesKvPairs + :members: +.. autoclass:: Dependency + :members: +.. autoclass:: DependencyList + :members: +.. autoclass:: EffectivePermissionsList + :members: +.. autoclass:: EffectivePredictiveOptimizationFlag + :members: +.. autoclass:: EffectivePrivilege + :members: +.. autoclass:: EffectivePrivilegeAssignment + :members: +.. autoclass:: EncryptionDetails + :members: +.. autoclass:: ExternalLocationInfo + :members: +.. autoclass:: ForeignKeyConstraint + :members: +.. autoclass:: FunctionDependency + :members: +.. autoclass:: FunctionInfo + :members: +.. autoclass:: FunctionParameterInfo + :members: +.. autoclass:: FunctionParameterInfos + :members: +.. autoclass:: GetMetastoreSummaryResponse + :members: +.. autoclass:: ListAccountMetastoreAssignmentsResponse + :members: +.. autoclass:: ListCatalogsResponse + :members: +.. autoclass:: ListConnectionsResponse + :members: +.. autoclass:: ListExternalLocationsResponse + :members: +.. autoclass:: ListFunctionsResponse + :members: +.. autoclass:: ListMetastoresResponse + :members: +.. autoclass:: ListModelVersionsResponse + :members: +.. autoclass:: ListRegisteredModelsResponse + :members: +.. autoclass:: ListSchemasResponse + :members: +.. autoclass:: ListStorageCredentialsResponse + :members: +.. autoclass:: ListSystemSchemasResponse + :members: +.. autoclass:: ListTableSummariesResponse + :members: +.. autoclass:: ListTablesResponse + :members: +.. autoclass:: ListVolumesResponseContent + :members: +.. autoclass:: MetastoreAssignment + :members: +.. autoclass:: MetastoreInfo + :members: +.. autoclass:: ModelVersionInfo + :members: +.. autoclass:: NamedTableConstraint + :members: +.. autoclass:: PermissionsChange + :members: +.. autoclass:: PermissionsList + :members: +.. autoclass:: PrimaryKeyConstraint + :members: +.. autoclass:: PrivilegeAssignment + :members: +.. autoclass:: ProvisioningInfo + :members: +.. autoclass:: RegisteredModelAlias + :members: +.. autoclass:: RegisteredModelInfo + :members: +.. autoclass:: SchemaInfo + :members: +.. autoclass:: SetArtifactAllowlist + :members: +.. autoclass:: SetRegisteredModelAliasRequest + :members: +.. autoclass:: SseEncryptionDetails + :members: +.. autoclass:: StorageCredentialInfo + :members: +.. autoclass:: SystemSchemaInfo + :members: +.. autoclass:: TableConstraint + :members: +.. autoclass:: TableDependency + :members: +.. autoclass:: TableInfo + :members: +.. autoclass:: TableRowFilter + :members: +.. autoclass:: TableSummary + :members: +.. autoclass:: UpdateCatalog + :members: +.. autoclass:: UpdateConnection + :members: +.. autoclass:: UpdateExternalLocation + :members: +.. autoclass:: UpdateFunction + :members: +.. autoclass:: UpdateMetastore + :members: +.. autoclass:: UpdateMetastoreAssignment + :members: +.. autoclass:: UpdateModelVersionRequest + :members: +.. autoclass:: UpdatePermissions + :members: +.. autoclass:: UpdateRegisteredModelRequest + :members: +.. autoclass:: UpdateSchema + :members: +.. autoclass:: UpdateStorageCredential + :members: +.. autoclass:: UpdateVolumeRequestContent + :members: +.. autoclass:: UpdateWorkspaceBindings + :members: +.. autoclass:: UpdateWorkspaceBindingsParameters + :members: +.. autoclass:: ValidateStorageCredential + :members: +.. autoclass:: ValidateStorageCredentialResponse + :members: +.. autoclass:: ValidationResult + :members: +.. autoclass:: VolumeInfo + :members: +.. autoclass:: WorkspaceBinding + :members: +.. autoclass:: WorkspaceBindingsResponse + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/compute.rst b/docs/dbdataclasses/compute.rst new file mode 100644 index 000000000..f22e7fa83 --- /dev/null +++ b/docs/dbdataclasses/compute.rst @@ -0,0 +1,252 @@ +Compute +======= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.compute`` module. + +.. py:currentmodule:: databricks.sdk.service.compute +.. autoclass:: AddInstanceProfile + :members: +.. autoclass:: AutoScale + :members: +.. autoclass:: AwsAttributes + :members: +.. autoclass:: AzureAttributes + :members: +.. autoclass:: CancelCommand + :members: +.. autoclass:: ChangeClusterOwner + :members: +.. autoclass:: ClientsTypes + :members: +.. autoclass:: CloudProviderNodeInfo + :members: +.. autoclass:: ClusterAccessControlRequest + :members: +.. autoclass:: ClusterAccessControlResponse + :members: +.. autoclass:: ClusterAttributes + :members: +.. autoclass:: ClusterDetails + :members: +.. autoclass:: ClusterEvent + :members: +.. autoclass:: ClusterLibraryStatuses + :members: +.. autoclass:: ClusterLogConf + :members: +.. autoclass:: ClusterPermission + :members: +.. autoclass:: ClusterPermissions + :members: +.. autoclass:: ClusterPermissionsDescription + :members: +.. autoclass:: ClusterPermissionsRequest + :members: +.. autoclass:: ClusterPolicyAccessControlRequest + :members: +.. autoclass:: ClusterPolicyAccessControlResponse + :members: +.. autoclass:: ClusterPolicyPermission + :members: +.. autoclass:: ClusterPolicyPermissions + :members: +.. autoclass:: ClusterPolicyPermissionsDescription + :members: +.. autoclass:: ClusterPolicyPermissionsRequest + :members: +.. autoclass:: ClusterSize + :members: +.. autoclass:: ClusterSpec + :members: +.. autoclass:: Command + :members: +.. autoclass:: CommandStatusResponse + :members: +.. autoclass:: ComputeSpec + :members: +.. autoclass:: ContextStatusResponse + :members: +.. autoclass:: CreateCluster + :members: +.. autoclass:: CreateClusterResponse + :members: +.. autoclass:: CreateContext + :members: +.. autoclass:: CreateInstancePool + :members: +.. autoclass:: CreateInstancePoolResponse + :members: +.. autoclass:: CreatePolicy + :members: +.. autoclass:: CreatePolicyResponse + :members: +.. autoclass:: CreateResponse + :members: +.. autoclass:: Created + :members: +.. autoclass:: DataPlaneEventDetails + :members: +.. autoclass:: DbfsStorageInfo + :members: +.. autoclass:: DeleteCluster + :members: +.. autoclass:: DeleteInstancePool + :members: +.. autoclass:: DeletePolicy + :members: +.. autoclass:: DestroyContext + :members: +.. autoclass:: DiskSpec + :members: +.. autoclass:: DiskType + :members: +.. autoclass:: DockerBasicAuth + :members: +.. autoclass:: DockerImage + :members: +.. autoclass:: EditCluster + :members: +.. autoclass:: EditInstancePool + :members: +.. autoclass:: EditPolicy + :members: +.. autoclass:: EventDetails + :members: +.. autoclass:: GcpAttributes + :members: +.. autoclass:: GetClusterPermissionLevelsResponse + :members: +.. autoclass:: GetClusterPolicyPermissionLevelsResponse + :members: +.. autoclass:: GetEvents + :members: +.. autoclass:: GetEventsResponse + :members: +.. autoclass:: GetInstancePool + :members: +.. autoclass:: GetInstancePoolPermissionLevelsResponse + :members: +.. autoclass:: GetSparkVersionsResponse + :members: +.. autoclass:: GlobalInitScriptCreateRequest + :members: +.. autoclass:: GlobalInitScriptDetails + :members: +.. autoclass:: GlobalInitScriptDetailsWithContent + :members: +.. autoclass:: GlobalInitScriptUpdateRequest + :members: +.. autoclass:: InitScriptEventDetails + :members: +.. autoclass:: InitScriptExecutionDetails + :members: +.. autoclass:: InitScriptInfo + :members: +.. autoclass:: InitScriptInfoAndExecutionDetails + :members: +.. autoclass:: InstallLibraries + :members: +.. autoclass:: InstancePoolAccessControlRequest + :members: +.. autoclass:: InstancePoolAccessControlResponse + :members: +.. autoclass:: InstancePoolAndStats + :members: +.. autoclass:: InstancePoolAwsAttributes + :members: +.. autoclass:: InstancePoolAzureAttributes + :members: +.. autoclass:: InstancePoolGcpAttributes + :members: +.. autoclass:: InstancePoolPermission + :members: +.. autoclass:: InstancePoolPermissions + :members: +.. autoclass:: InstancePoolPermissionsDescription + :members: +.. autoclass:: InstancePoolPermissionsRequest + :members: +.. autoclass:: InstancePoolStats + :members: +.. autoclass:: InstancePoolStatus + :members: +.. autoclass:: InstanceProfile + :members: +.. autoclass:: Library + :members: +.. autoclass:: LibraryFullStatus + :members: +.. autoclass:: ListAllClusterLibraryStatusesResponse + :members: +.. autoclass:: ListAvailableZonesResponse + :members: +.. autoclass:: ListClustersResponse + :members: +.. autoclass:: ListGlobalInitScriptsResponse + :members: +.. autoclass:: ListInstancePools + :members: +.. autoclass:: ListInstanceProfilesResponse + :members: +.. autoclass:: ListNodeTypesResponse + :members: +.. autoclass:: ListPoliciesResponse + :members: +.. autoclass:: ListPolicyFamiliesResponse + :members: +.. autoclass:: LocalFileInfo + :members: +.. autoclass:: LogAnalyticsInfo + :members: +.. autoclass:: LogSyncStatus + :members: +.. autoclass:: MavenLibrary + :members: +.. autoclass:: NodeInstanceType + :members: +.. autoclass:: NodeType + :members: +.. autoclass:: PendingInstanceError + :members: +.. autoclass:: PermanentDeleteCluster + :members: +.. autoclass:: PinCluster + :members: +.. autoclass:: Policy + :members: +.. autoclass:: PolicyFamily + :members: +.. autoclass:: PythonPyPiLibrary + :members: +.. autoclass:: RCranLibrary + :members: +.. autoclass:: RemoveInstanceProfile + :members: +.. autoclass:: ResizeCluster + :members: +.. autoclass:: RestartCluster + :members: +.. autoclass:: Results + :members: +.. autoclass:: S3StorageInfo + :members: +.. autoclass:: SparkNode + :members: +.. autoclass:: SparkNodeAwsAttributes + :members: +.. autoclass:: SparkVersion + :members: +.. autoclass:: StartCluster + :members: +.. autoclass:: TerminationReason + :members: +.. autoclass:: UninstallLibraries + :members: +.. autoclass:: UnpinCluster + :members: +.. autoclass:: VolumesStorageInfo + :members: +.. autoclass:: WorkloadType + :members: +.. autoclass:: WorkspaceStorageInfo + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/dashboards.rst b/docs/dbdataclasses/dashboards.rst new file mode 100644 index 000000000..3b24ec751 --- /dev/null +++ b/docs/dbdataclasses/dashboards.rst @@ -0,0 +1,8 @@ +Dashboards +========== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.dashboards`` module. + +.. py:currentmodule:: databricks.sdk.service.dashboards +.. autoclass:: PublishRequest + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/files.rst b/docs/dbdataclasses/files.rst new file mode 100644 index 000000000..b7c1f1914 --- /dev/null +++ b/docs/dbdataclasses/files.rst @@ -0,0 +1,30 @@ +File Management +=============== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.files`` module. + +.. py:currentmodule:: databricks.sdk.service.files +.. autoclass:: AddBlock + :members: +.. autoclass:: Close + :members: +.. autoclass:: Create + :members: +.. autoclass:: CreateResponse + :members: +.. autoclass:: Delete + :members: +.. autoclass:: DownloadResponse + :members: +.. autoclass:: FileInfo + :members: +.. autoclass:: ListStatusResponse + :members: +.. autoclass:: MkDirs + :members: +.. autoclass:: Move + :members: +.. autoclass:: Put + :members: +.. autoclass:: ReadResponse + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/iam.rst b/docs/dbdataclasses/iam.rst new file mode 100644 index 000000000..6f76e3176 --- /dev/null +++ b/docs/dbdataclasses/iam.rst @@ -0,0 +1,80 @@ +Identity and Access Management +============================== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.iam`` module. + +.. py:currentmodule:: databricks.sdk.service.iam +.. autoclass:: AccessControlRequest + :members: +.. autoclass:: AccessControlResponse + :members: +.. autoclass:: ComplexValue + :members: +.. autoclass:: GetAssignableRolesForResourceResponse + :members: +.. autoclass:: GetPasswordPermissionLevelsResponse + :members: +.. autoclass:: GetPermissionLevelsResponse + :members: +.. autoclass:: GrantRule + :members: +.. autoclass:: Group + :members: +.. autoclass:: ListGroupsResponse + :members: +.. autoclass:: ListServicePrincipalResponse + :members: +.. autoclass:: ListUsersResponse + :members: +.. autoclass:: Name + :members: +.. autoclass:: ObjectPermissions + :members: +.. autoclass:: PartialUpdate + :members: +.. autoclass:: PasswordAccessControlRequest + :members: +.. autoclass:: PasswordAccessControlResponse + :members: +.. autoclass:: PasswordPermission + :members: +.. autoclass:: PasswordPermissions + :members: +.. autoclass:: PasswordPermissionsDescription + :members: +.. autoclass:: PasswordPermissionsRequest + :members: +.. autoclass:: Patch + :members: +.. autoclass:: Permission + :members: +.. autoclass:: PermissionAssignment + :members: +.. autoclass:: PermissionAssignments + :members: +.. autoclass:: PermissionOutput + :members: +.. autoclass:: PermissionsDescription + :members: +.. autoclass:: PermissionsRequest + :members: +.. autoclass:: PrincipalOutput + :members: +.. autoclass:: ResourceMeta + :members: +.. autoclass:: Role + :members: +.. autoclass:: RuleSetResponse + :members: +.. autoclass:: RuleSetUpdateRequest + :members: +.. autoclass:: ServicePrincipal + :members: +.. autoclass:: UpdateRuleSetRequest + :members: +.. autoclass:: UpdateWorkspaceAssignments + :members: +.. autoclass:: User + :members: +.. autoclass:: WorkspacePermissions + :members: \ No newline at end of file diff --git a/docs/autogen/reference.rst b/docs/dbdataclasses/index.rst similarity index 78% rename from docs/autogen/reference.rst rename to docs/dbdataclasses/index.rst index 5d5b020f1..f35ac7c51 100644 --- a/docs/autogen/reference.rst +++ b/docs/dbdataclasses/index.rst @@ -1,22 +1,24 @@ -Reference -========= +Dataclasses +=========== .. toctree:: :maxdepth: 1 - billing - catalog + workspace compute - files - iam jobs - ml - oauth2 pipelines - provisioning + files + ml serving - settings - sharing + iam sql - workspace + catalog + sharing + settings + provisioning + billing + oauth2 + vectorsearch + dashboards \ No newline at end of file diff --git a/docs/dbdataclasses/jobs.rst b/docs/dbdataclasses/jobs.rst new file mode 100644 index 000000000..48cca1cfe --- /dev/null +++ b/docs/dbdataclasses/jobs.rst @@ -0,0 +1,208 @@ +Jobs +==== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.jobs`` module. + +.. py:currentmodule:: databricks.sdk.service.jobs +.. autoclass:: BaseJob + :members: +.. autoclass:: BaseRun + :members: +.. autoclass:: CancelAllRuns + :members: +.. autoclass:: CancelRun + :members: +.. autoclass:: ClusterInstance + :members: +.. autoclass:: ClusterSpec + :members: +.. autoclass:: ConditionTask + :members: +.. autoclass:: Continuous + :members: +.. autoclass:: CreateJob + :members: +.. autoclass:: CreateResponse + :members: +.. autoclass:: CronSchedule + :members: +.. autoclass:: DbtOutput + :members: +.. autoclass:: DbtTask + :members: +.. autoclass:: DeleteJob + :members: +.. autoclass:: DeleteRun + :members: +.. autoclass:: ExportRunOutput + :members: +.. autoclass:: FileArrivalTriggerConfiguration + :members: +.. autoclass:: GetJobPermissionLevelsResponse + :members: +.. autoclass:: GitSnapshot + :members: +.. autoclass:: GitSource + :members: +.. autoclass:: Job + :members: +.. autoclass:: JobAccessControlRequest + :members: +.. autoclass:: JobAccessControlResponse + :members: +.. autoclass:: JobCluster + :members: +.. autoclass:: JobCompute + :members: +.. autoclass:: JobDeployment + :members: +.. autoclass:: JobEmailNotifications + :members: +.. autoclass:: JobNotificationSettings + :members: +.. autoclass:: JobParameter + :members: +.. autoclass:: JobParameterDefinition + :members: +.. autoclass:: JobPermission + :members: +.. autoclass:: JobPermissions + :members: +.. autoclass:: JobPermissionsDescription + :members: +.. autoclass:: JobPermissionsRequest + :members: +.. autoclass:: JobRunAs + :members: +.. autoclass:: JobSettings + :members: +.. autoclass:: JobSource + :members: +.. autoclass:: JobsHealthRule + :members: +.. autoclass:: JobsHealthRules + :members: +.. autoclass:: ListJobsResponse + :members: +.. autoclass:: ListRunsResponse + :members: +.. autoclass:: NotebookOutput + :members: +.. autoclass:: NotebookTask + :members: +.. autoclass:: PipelineParams + :members: +.. autoclass:: PipelineTask + :members: +.. autoclass:: PythonWheelTask + :members: +.. autoclass:: QueueSettings + :members: +.. autoclass:: RepairHistoryItem + :members: +.. autoclass:: RepairRun + :members: +.. autoclass:: RepairRunResponse + :members: +.. autoclass:: ResetJob + :members: +.. autoclass:: ResolvedConditionTaskValues + :members: +.. autoclass:: ResolvedDbtTaskValues + :members: +.. autoclass:: ResolvedNotebookTaskValues + :members: +.. autoclass:: ResolvedParamPairValues + :members: +.. autoclass:: ResolvedPythonWheelTaskValues + :members: +.. autoclass:: ResolvedRunJobTaskValues + :members: +.. autoclass:: ResolvedStringParamsValues + :members: +.. autoclass:: ResolvedValues + :members: +.. autoclass:: Run + :members: +.. autoclass:: RunConditionTask + :members: +.. autoclass:: RunJobOutput + :members: +.. autoclass:: RunJobTask + :members: +.. autoclass:: RunNow + :members: +.. autoclass:: RunNowResponse + :members: +.. autoclass:: RunOutput + :members: +.. autoclass:: RunParameters + :members: +.. autoclass:: RunState + :members: +.. autoclass:: RunTask + :members: +.. autoclass:: SparkJarTask + :members: +.. autoclass:: SparkPythonTask + :members: +.. autoclass:: SparkSubmitTask + :members: +.. autoclass:: SqlAlertOutput + :members: +.. autoclass:: SqlDashboardOutput + :members: +.. autoclass:: SqlDashboardWidgetOutput + :members: +.. autoclass:: SqlOutput + :members: +.. autoclass:: SqlOutputError + :members: +.. autoclass:: SqlQueryOutput + :members: +.. autoclass:: SqlStatementOutput + :members: +.. autoclass:: SqlTask + :members: +.. autoclass:: SqlTaskAlert + :members: +.. autoclass:: SqlTaskDashboard + :members: +.. autoclass:: SqlTaskFile + :members: +.. autoclass:: SqlTaskQuery + :members: +.. autoclass:: SqlTaskSubscription + :members: +.. autoclass:: SubmitRun + :members: +.. autoclass:: SubmitRunResponse + :members: +.. autoclass:: SubmitTask + :members: +.. autoclass:: Task + :members: +.. autoclass:: TaskDependency + :members: +.. autoclass:: TaskEmailNotifications + :members: +.. autoclass:: TaskNotificationSettings + :members: +.. autoclass:: TriggerEvaluation + :members: +.. autoclass:: TriggerHistory + :members: +.. autoclass:: TriggerInfo + :members: +.. autoclass:: TriggerSettings + :members: +.. autoclass:: UpdateJob + :members: +.. autoclass:: ViewItem + :members: +.. autoclass:: Webhook + :members: +.. autoclass:: WebhookNotifications + :members: +.. autoclass:: WebhookNotificationsOnDurationWarningThresholdExceededItem + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/ml.rst b/docs/dbdataclasses/ml.rst new file mode 100644 index 000000000..3764a9251 --- /dev/null +++ b/docs/dbdataclasses/ml.rst @@ -0,0 +1,228 @@ +Machine Learning +================ + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.ml`` module. + +.. py:currentmodule:: databricks.sdk.service.ml +.. autoclass:: Activity + :members: +.. autoclass:: ApproveTransitionRequest + :members: +.. autoclass:: ApproveTransitionRequestResponse + :members: +.. autoclass:: CommentObject + :members: +.. autoclass:: CreateComment + :members: +.. autoclass:: CreateCommentResponse + :members: +.. autoclass:: CreateExperiment + :members: +.. autoclass:: CreateExperimentResponse + :members: +.. autoclass:: CreateModelRequest + :members: +.. autoclass:: CreateModelResponse + :members: +.. autoclass:: CreateModelVersionRequest + :members: +.. autoclass:: CreateModelVersionResponse + :members: +.. autoclass:: CreateRegistryWebhook + :members: +.. autoclass:: CreateRun + :members: +.. autoclass:: CreateRunResponse + :members: +.. autoclass:: CreateTransitionRequest + :members: +.. autoclass:: CreateTransitionRequestResponse + :members: +.. autoclass:: CreateWebhookResponse + :members: +.. autoclass:: Dataset + :members: +.. autoclass:: DatasetInput + :members: +.. autoclass:: DeleteExperiment + :members: +.. autoclass:: DeleteRun + :members: +.. autoclass:: DeleteRuns + :members: +.. autoclass:: DeleteRunsResponse + :members: +.. autoclass:: DeleteTag + :members: +.. autoclass:: Experiment + :members: +.. autoclass:: ExperimentAccessControlRequest + :members: +.. autoclass:: ExperimentAccessControlResponse + :members: +.. autoclass:: ExperimentPermission + :members: +.. autoclass:: ExperimentPermissions + :members: +.. autoclass:: ExperimentPermissionsDescription + :members: +.. autoclass:: ExperimentPermissionsRequest + :members: +.. autoclass:: ExperimentTag + :members: +.. autoclass:: FileInfo + :members: +.. autoclass:: GetExperimentPermissionLevelsResponse + :members: +.. autoclass:: GetExperimentResponse + :members: +.. autoclass:: GetLatestVersionsRequest + :members: +.. autoclass:: GetLatestVersionsResponse + :members: +.. autoclass:: GetMetricHistoryResponse + :members: +.. autoclass:: GetModelResponse + :members: +.. autoclass:: GetModelVersionDownloadUriResponse + :members: +.. autoclass:: GetModelVersionResponse + :members: +.. autoclass:: GetRegisteredModelPermissionLevelsResponse + :members: +.. autoclass:: GetRunResponse + :members: +.. autoclass:: HttpUrlSpec + :members: +.. autoclass:: HttpUrlSpecWithoutSecret + :members: +.. autoclass:: InputTag + :members: +.. autoclass:: JobSpec + :members: +.. autoclass:: JobSpecWithoutSecret + :members: +.. autoclass:: ListArtifactsResponse + :members: +.. autoclass:: ListExperimentsResponse + :members: +.. autoclass:: ListModelsResponse + :members: +.. autoclass:: ListRegistryWebhooks + :members: +.. autoclass:: ListTransitionRequestsResponse + :members: +.. autoclass:: LogBatch + :members: +.. autoclass:: LogInputs + :members: +.. autoclass:: LogMetric + :members: +.. autoclass:: LogModel + :members: +.. autoclass:: LogParam + :members: +.. autoclass:: Metric + :members: +.. autoclass:: Model + :members: +.. autoclass:: ModelDatabricks + :members: +.. autoclass:: ModelTag + :members: +.. autoclass:: ModelVersion + :members: +.. autoclass:: ModelVersionDatabricks + :members: +.. autoclass:: ModelVersionTag + :members: +.. autoclass:: Param + :members: +.. autoclass:: RegisteredModelAccessControlRequest + :members: +.. autoclass:: RegisteredModelAccessControlResponse + :members: +.. autoclass:: RegisteredModelPermission + :members: +.. autoclass:: RegisteredModelPermissions + :members: +.. autoclass:: RegisteredModelPermissionsDescription + :members: +.. autoclass:: RegisteredModelPermissionsRequest + :members: +.. autoclass:: RegistryWebhook + :members: +.. autoclass:: RejectTransitionRequest + :members: +.. autoclass:: RejectTransitionRequestResponse + :members: +.. autoclass:: RenameModelRequest + :members: +.. autoclass:: RenameModelResponse + :members: +.. autoclass:: RestoreExperiment + :members: +.. autoclass:: RestoreRun + :members: +.. autoclass:: RestoreRuns + :members: +.. autoclass:: RestoreRunsResponse + :members: +.. autoclass:: Run + :members: +.. autoclass:: RunData + :members: +.. autoclass:: RunInfo + :members: +.. autoclass:: RunInputs + :members: +.. autoclass:: RunTag + :members: +.. autoclass:: SearchExperiments + :members: +.. autoclass:: SearchExperimentsResponse + :members: +.. autoclass:: SearchModelVersionsResponse + :members: +.. autoclass:: SearchModelsResponse + :members: +.. autoclass:: SearchRuns + :members: +.. autoclass:: SearchRunsResponse + :members: +.. autoclass:: SetExperimentTag + :members: +.. autoclass:: SetModelTagRequest + :members: +.. autoclass:: SetModelVersionTagRequest + :members: +.. autoclass:: SetTag + :members: +.. autoclass:: TestRegistryWebhook + :members: +.. autoclass:: TestRegistryWebhookRequest + :members: +.. autoclass:: TestRegistryWebhookResponse + :members: +.. autoclass:: TransitionModelVersionStageDatabricks + :members: +.. autoclass:: TransitionRequest + :members: +.. autoclass:: TransitionStageResponse + :members: +.. autoclass:: UpdateComment + :members: +.. autoclass:: UpdateCommentResponse + :members: +.. autoclass:: UpdateExperiment + :members: +.. autoclass:: UpdateModelRequest + :members: +.. autoclass:: UpdateModelVersionRequest + :members: +.. autoclass:: UpdateRegistryWebhook + :members: +.. autoclass:: UpdateRun + :members: +.. autoclass:: UpdateRunResponse + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/oauth2.rst b/docs/dbdataclasses/oauth2.rst new file mode 100644 index 000000000..f455f0f17 --- /dev/null +++ b/docs/dbdataclasses/oauth2.rst @@ -0,0 +1,38 @@ +OAuth +===== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.oauth2`` module. + +.. py:currentmodule:: databricks.sdk.service.oauth2 +.. autoclass:: CreateCustomAppIntegration + :members: +.. autoclass:: CreateCustomAppIntegrationOutput + :members: +.. autoclass:: CreatePublishedAppIntegration + :members: +.. autoclass:: CreatePublishedAppIntegrationOutput + :members: +.. autoclass:: CreateServicePrincipalSecretResponse + :members: +.. autoclass:: GetCustomAppIntegrationOutput + :members: +.. autoclass:: GetCustomAppIntegrationsOutput + :members: +.. autoclass:: GetPublishedAppIntegrationOutput + :members: +.. autoclass:: GetPublishedAppIntegrationsOutput + :members: +.. autoclass:: GetPublishedAppsOutput + :members: +.. autoclass:: ListServicePrincipalSecretsResponse + :members: +.. autoclass:: PublishedAppOutput + :members: +.. autoclass:: SecretInfo + :members: +.. autoclass:: TokenAccessPolicy + :members: +.. autoclass:: UpdateCustomAppIntegration + :members: +.. autoclass:: UpdatePublishedAppIntegration + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/pipelines.rst b/docs/dbdataclasses/pipelines.rst new file mode 100644 index 000000000..6a9b03f20 --- /dev/null +++ b/docs/dbdataclasses/pipelines.rst @@ -0,0 +1,78 @@ +Delta Live Tables +================= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.pipelines`` module. + +.. py:currentmodule:: databricks.sdk.service.pipelines +.. autoclass:: CreatePipeline + :members: +.. autoclass:: CreatePipelineResponse + :members: +.. autoclass:: CronTrigger + :members: +.. autoclass:: DataPlaneId + :members: +.. autoclass:: EditPipeline + :members: +.. autoclass:: ErrorDetail + :members: +.. autoclass:: FileLibrary + :members: +.. autoclass:: Filters + :members: +.. autoclass:: GetPipelinePermissionLevelsResponse + :members: +.. autoclass:: GetPipelineResponse + :members: +.. autoclass:: GetUpdateResponse + :members: +.. autoclass:: ListPipelineEventsResponse + :members: +.. autoclass:: ListPipelinesResponse + :members: +.. autoclass:: ListUpdatesResponse + :members: +.. autoclass:: NotebookLibrary + :members: +.. autoclass:: Notifications + :members: +.. autoclass:: Origin + :members: +.. autoclass:: PipelineAccessControlRequest + :members: +.. autoclass:: PipelineAccessControlResponse + :members: +.. autoclass:: PipelineCluster + :members: +.. autoclass:: PipelineEvent + :members: +.. autoclass:: PipelineLibrary + :members: +.. autoclass:: PipelinePermission + :members: +.. autoclass:: PipelinePermissions + :members: +.. autoclass:: PipelinePermissionsDescription + :members: +.. autoclass:: PipelinePermissionsRequest + :members: +.. autoclass:: PipelineSpec + :members: +.. autoclass:: PipelineStateInfo + :members: +.. autoclass:: PipelineTrigger + :members: +.. autoclass:: Sequencing + :members: +.. autoclass:: SerializedException + :members: +.. autoclass:: StackFrame + :members: +.. autoclass:: StartUpdate + :members: +.. autoclass:: StartUpdateResponse + :members: +.. autoclass:: UpdateInfo + :members: +.. autoclass:: UpdateStateInfo + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/provisioning.rst b/docs/dbdataclasses/provisioning.rst new file mode 100644 index 000000000..558881a00 --- /dev/null +++ b/docs/dbdataclasses/provisioning.rst @@ -0,0 +1,74 @@ +Provisioning +============ + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.provisioning`` module. + +.. py:currentmodule:: databricks.sdk.service.provisioning +.. autoclass:: AwsCredentials + :members: +.. autoclass:: AwsKeyInfo + :members: +.. autoclass:: AzureWorkspaceInfo + :members: +.. autoclass:: CloudResourceContainer + :members: +.. autoclass:: CreateAwsKeyInfo + :members: +.. autoclass:: CreateCredentialAwsCredentials + :members: +.. autoclass:: CreateCredentialRequest + :members: +.. autoclass:: CreateCredentialStsRole + :members: +.. autoclass:: CreateCustomerManagedKeyRequest + :members: +.. autoclass:: CreateGcpKeyInfo + :members: +.. autoclass:: CreateNetworkRequest + :members: +.. autoclass:: CreateStorageConfigurationRequest + :members: +.. autoclass:: CreateVpcEndpointRequest + :members: +.. autoclass:: CreateWorkspaceRequest + :members: +.. autoclass:: Credential + :members: +.. autoclass:: CustomerFacingGcpCloudResourceContainer + :members: +.. autoclass:: CustomerManagedKey + :members: +.. autoclass:: GcpKeyInfo + :members: +.. autoclass:: GcpManagedNetworkConfig + :members: +.. autoclass:: GcpNetworkInfo + :members: +.. autoclass:: GcpVpcEndpointInfo + :members: +.. autoclass:: GkeConfig + :members: +.. autoclass:: Network + :members: +.. autoclass:: NetworkHealth + :members: +.. autoclass:: NetworkVpcEndpoints + :members: +.. autoclass:: NetworkWarning + :members: +.. autoclass:: PrivateAccessSettings + :members: +.. autoclass:: RootBucketInfo + :members: +.. autoclass:: StorageConfiguration + :members: +.. autoclass:: StsRole + :members: +.. autoclass:: UpdateWorkspaceRequest + :members: +.. autoclass:: UpsertPrivateAccessSettingsRequest + :members: +.. autoclass:: VpcEndpoint + :members: +.. autoclass:: Workspace + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/serving.rst b/docs/dbdataclasses/serving.rst new file mode 100644 index 000000000..8828e5956 --- /dev/null +++ b/docs/dbdataclasses/serving.rst @@ -0,0 +1,128 @@ +Real-time Serving +================= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.serving`` module. + +.. py:currentmodule:: databricks.sdk.service.serving +.. autoclass:: Ai21LabsConfig + :members: +.. autoclass:: AnthropicConfig + :members: +.. autoclass:: AppEvents + :members: +.. autoclass:: AppManifest + :members: +.. autoclass:: AppServiceStatus + :members: +.. autoclass:: AutoCaptureConfigInput + :members: +.. autoclass:: AutoCaptureConfigOutput + :members: +.. autoclass:: AutoCaptureState + :members: +.. autoclass:: AwsBedrockConfig + :members: +.. autoclass:: BuildLogsResponse + :members: +.. autoclass:: ChatMessage + :members: +.. autoclass:: CohereConfig + :members: +.. autoclass:: CreateServingEndpoint + :members: +.. autoclass:: DatabricksModelServingConfig + :members: +.. autoclass:: DataframeSplitInput + :members: +.. autoclass:: DeleteAppResponse + :members: +.. autoclass:: DeployAppRequest + :members: +.. autoclass:: DeploymentStatus + :members: +.. autoclass:: EmbeddingsV1ResponseEmbeddingElement + :members: +.. autoclass:: EndpointCoreConfigInput + :members: +.. autoclass:: EndpointCoreConfigOutput + :members: +.. autoclass:: EndpointCoreConfigSummary + :members: +.. autoclass:: EndpointPendingConfig + :members: +.. autoclass:: EndpointState + :members: +.. autoclass:: EndpointTag + :members: +.. autoclass:: ExternalModel + :members: +.. autoclass:: ExternalModelConfig + :members: +.. autoclass:: ExternalModelUsageElement + :members: +.. autoclass:: FoundationModel + :members: +.. autoclass:: GetAppResponse + :members: +.. autoclass:: GetServingEndpointPermissionLevelsResponse + :members: +.. autoclass:: ListAppEventsResponse + :members: +.. autoclass:: ListAppsResponse + :members: +.. autoclass:: ListEndpointsResponse + :members: +.. autoclass:: OpenAiConfig + :members: +.. autoclass:: PaLmConfig + :members: +.. autoclass:: PatchServingEndpointTags + :members: +.. autoclass:: PayloadTable + :members: +.. autoclass:: PutResponse + :members: +.. autoclass:: QueryEndpointInput + :members: +.. autoclass:: QueryEndpointResponse + :members: +.. autoclass:: RateLimit + :members: +.. autoclass:: Route + :members: +.. autoclass:: ServedEntityInput + :members: +.. autoclass:: ServedEntityOutput + :members: +.. autoclass:: ServedEntitySpec + :members: +.. autoclass:: ServedModelInput + :members: +.. autoclass:: ServedModelOutput + :members: +.. autoclass:: ServedModelSpec + :members: +.. autoclass:: ServedModelState + :members: +.. autoclass:: ServerLogsResponse + :members: +.. autoclass:: ServingEndpoint + :members: +.. autoclass:: ServingEndpointAccessControlRequest + :members: +.. autoclass:: ServingEndpointAccessControlResponse + :members: +.. autoclass:: ServingEndpointDetailed + :members: +.. autoclass:: ServingEndpointPermission + :members: +.. autoclass:: ServingEndpointPermissions + :members: +.. autoclass:: ServingEndpointPermissionsDescription + :members: +.. autoclass:: ServingEndpointPermissionsRequest + :members: +.. autoclass:: TrafficConfig + :members: +.. autoclass:: V1ResponseChoiceElement + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/settings.rst b/docs/dbdataclasses/settings.rst new file mode 100644 index 000000000..66cbb7b33 --- /dev/null +++ b/docs/dbdataclasses/settings.rst @@ -0,0 +1,96 @@ +Settings +======== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.settings`` module. + +.. py:currentmodule:: databricks.sdk.service.settings +.. autoclass:: CreateIpAccessList + :members: +.. autoclass:: CreateIpAccessListResponse + :members: +.. autoclass:: CreateNetworkConnectivityConfigRequest + :members: +.. autoclass:: CreateOboTokenRequest + :members: +.. autoclass:: CreateOboTokenResponse + :members: +.. autoclass:: CreatePrivateEndpointRuleRequest + :members: +.. autoclass:: CreateTokenRequest + :members: +.. autoclass:: CreateTokenResponse + :members: +.. autoclass:: DefaultNamespaceSetting + :members: +.. autoclass:: DeleteDefaultWorkspaceNamespaceResponse + :members: +.. autoclass:: DeletePersonalComputeSettingResponse + :members: +.. autoclass:: ExchangeToken + :members: +.. autoclass:: ExchangeTokenRequest + :members: +.. autoclass:: ExchangeTokenResponse + :members: +.. autoclass:: FetchIpAccessListResponse + :members: +.. autoclass:: GetIpAccessListResponse + :members: +.. autoclass:: GetIpAccessListsResponse + :members: +.. autoclass:: GetTokenPermissionLevelsResponse + :members: +.. autoclass:: IpAccessListInfo + :members: +.. autoclass:: ListIpAccessListResponse + :members: +.. autoclass:: ListNccAzurePrivateEndpointRulesResponse + :members: +.. autoclass:: ListNetworkConnectivityConfigurationsResponse + :members: +.. autoclass:: ListPublicTokensResponse + :members: +.. autoclass:: ListTokensResponse + :members: +.. autoclass:: NccAzurePrivateEndpointRule + :members: +.. autoclass:: NccAzureServiceEndpointRule + :members: +.. autoclass:: NccEgressConfig + :members: +.. autoclass:: NccEgressDefaultRules + :members: +.. autoclass:: NccEgressTargetRules + :members: +.. autoclass:: NetworkConnectivityConfiguration + :members: +.. autoclass:: PartitionId + :members: +.. autoclass:: PersonalComputeMessage + :members: +.. autoclass:: PersonalComputeSetting + :members: +.. autoclass:: PublicTokenInfo + :members: +.. autoclass:: ReplaceIpAccessList + :members: +.. autoclass:: RevokeTokenRequest + :members: +.. autoclass:: StringMessage + :members: +.. autoclass:: TokenAccessControlRequest + :members: +.. autoclass:: TokenAccessControlResponse + :members: +.. autoclass:: TokenInfo + :members: +.. autoclass:: TokenPermission + :members: +.. autoclass:: TokenPermissions + :members: +.. autoclass:: TokenPermissionsDescription + :members: +.. autoclass:: TokenPermissionsRequest + :members: +.. autoclass:: UpdateIpAccessList + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/sharing.rst b/docs/dbdataclasses/sharing.rst new file mode 100644 index 000000000..f9897ef5c --- /dev/null +++ b/docs/dbdataclasses/sharing.rst @@ -0,0 +1,88 @@ +Delta Sharing +============= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.sharing`` module. + +.. py:currentmodule:: databricks.sdk.service.sharing +.. autoclass:: CentralCleanRoomInfo + :members: +.. autoclass:: CleanRoomAssetInfo + :members: +.. autoclass:: CleanRoomCatalog + :members: +.. autoclass:: CleanRoomCatalogUpdate + :members: +.. autoclass:: CleanRoomCollaboratorInfo + :members: +.. autoclass:: CleanRoomInfo + :members: +.. autoclass:: CleanRoomNotebookInfo + :members: +.. autoclass:: CleanRoomTableInfo + :members: +.. autoclass:: ColumnInfo + :members: +.. autoclass:: ColumnMask + :members: +.. autoclass:: CreateCleanRoom + :members: +.. autoclass:: CreateProvider + :members: +.. autoclass:: CreateRecipient + :members: +.. autoclass:: CreateShare + :members: +.. autoclass:: GetRecipientSharePermissionsResponse + :members: +.. autoclass:: IpAccessList + :members: +.. autoclass:: ListCleanRoomsResponse + :members: +.. autoclass:: ListProviderSharesResponse + :members: +.. autoclass:: ListProvidersResponse + :members: +.. autoclass:: ListRecipientsResponse + :members: +.. autoclass:: ListSharesResponse + :members: +.. autoclass:: Partition + :members: +.. autoclass:: PartitionValue + :members: +.. autoclass:: PrivilegeAssignment + :members: +.. autoclass:: ProviderInfo + :members: +.. autoclass:: ProviderShare + :members: +.. autoclass:: RecipientInfo + :members: +.. autoclass:: RecipientProfile + :members: +.. autoclass:: RecipientTokenInfo + :members: +.. autoclass:: RetrieveTokenResponse + :members: +.. autoclass:: RotateRecipientToken + :members: +.. autoclass:: SecurablePropertiesKvPairs + :members: +.. autoclass:: ShareInfo + :members: +.. autoclass:: ShareToPrivilegeAssignment + :members: +.. autoclass:: SharedDataObject + :members: +.. autoclass:: SharedDataObjectUpdate + :members: +.. autoclass:: UpdateCleanRoom + :members: +.. autoclass:: UpdateProvider + :members: +.. autoclass:: UpdateRecipient + :members: +.. autoclass:: UpdateShare + :members: +.. autoclass:: UpdateSharePermissions + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/sql.rst b/docs/dbdataclasses/sql.rst new file mode 100644 index 000000000..50f070ca9 --- /dev/null +++ b/docs/dbdataclasses/sql.rst @@ -0,0 +1,146 @@ +Databricks SQL +============== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.sql`` module. + +.. py:currentmodule:: databricks.sdk.service.sql +.. autoclass:: AccessControl + :members: +.. autoclass:: Alert + :members: +.. autoclass:: AlertOptions + :members: +.. autoclass:: AlertQuery + :members: +.. autoclass:: BaseChunkInfo + :members: +.. autoclass:: Channel + :members: +.. autoclass:: ChannelInfo + :members: +.. autoclass:: ColumnInfo + :members: +.. autoclass:: CreateAlert + :members: +.. autoclass:: CreateWarehouseRequest + :members: +.. autoclass:: CreateWarehouseResponse + :members: +.. autoclass:: CreateWidget + :members: +.. autoclass:: Dashboard + :members: +.. autoclass:: DashboardEditContent + :members: +.. autoclass:: DashboardOptions + :members: +.. autoclass:: DashboardPostContent + :members: +.. autoclass:: DataSource + :members: +.. autoclass:: EditAlert + :members: +.. autoclass:: EditWarehouseRequest + :members: +.. autoclass:: EndpointConfPair + :members: +.. autoclass:: EndpointHealth + :members: +.. autoclass:: EndpointInfo + :members: +.. autoclass:: EndpointTagPair + :members: +.. autoclass:: EndpointTags + :members: +.. autoclass:: ExecuteStatementRequest + :members: +.. autoclass:: ExecuteStatementResponse + :members: +.. autoclass:: ExternalLink + :members: +.. autoclass:: GetResponse + :members: +.. autoclass:: GetStatementResponse + :members: +.. autoclass:: GetWarehousePermissionLevelsResponse + :members: +.. autoclass:: GetWarehouseResponse + :members: +.. autoclass:: GetWorkspaceWarehouseConfigResponse + :members: +.. autoclass:: ListQueriesResponse + :members: +.. autoclass:: ListResponse + :members: +.. autoclass:: ListWarehousesResponse + :members: +.. autoclass:: OdbcParams + :members: +.. autoclass:: Parameter + :members: +.. autoclass:: Query + :members: +.. autoclass:: QueryEditContent + :members: +.. autoclass:: QueryFilter + :members: +.. autoclass:: QueryInfo + :members: +.. autoclass:: QueryList + :members: +.. autoclass:: QueryMetrics + :members: +.. autoclass:: QueryOptions + :members: +.. autoclass:: QueryPostContent + :members: +.. autoclass:: RepeatedEndpointConfPairs + :members: +.. autoclass:: ResultData + :members: +.. autoclass:: ResultManifest + :members: +.. autoclass:: ResultSchema + :members: +.. autoclass:: ServiceError + :members: +.. autoclass:: SetResponse + :members: +.. autoclass:: SetWorkspaceWarehouseConfigRequest + :members: +.. autoclass:: StatementParameterListItem + :members: +.. autoclass:: StatementStatus + :members: +.. autoclass:: Success + :members: +.. autoclass:: TerminationReason + :members: +.. autoclass:: TimeRange + :members: +.. autoclass:: TransferOwnershipObjectId + :members: +.. autoclass:: User + :members: +.. autoclass:: Visualization + :members: +.. autoclass:: WarehouseAccessControlRequest + :members: +.. autoclass:: WarehouseAccessControlResponse + :members: +.. autoclass:: WarehousePermission + :members: +.. autoclass:: WarehousePermissions + :members: +.. autoclass:: WarehousePermissionsDescription + :members: +.. autoclass:: WarehousePermissionsRequest + :members: +.. autoclass:: WarehouseTypePair + :members: +.. autoclass:: Widget + :members: +.. autoclass:: WidgetOptions + :members: +.. autoclass:: WidgetPosition + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/vectorsearch.rst b/docs/dbdataclasses/vectorsearch.rst new file mode 100644 index 000000000..5f48bf4b4 --- /dev/null +++ b/docs/dbdataclasses/vectorsearch.rst @@ -0,0 +1,60 @@ +Vector Search +============= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.vectorsearch`` module. + +.. py:currentmodule:: databricks.sdk.service.vectorsearch +.. autoclass:: ColumnInfo + :members: +.. autoclass:: CreateEndpoint + :members: +.. autoclass:: CreateVectorIndexRequest + :members: +.. autoclass:: CreateVectorIndexResponse + :members: +.. autoclass:: DeleteDataResult + :members: +.. autoclass:: DeleteDataVectorIndexRequest + :members: +.. autoclass:: DeleteDataVectorIndexResponse + :members: +.. autoclass:: DeltaSyncVectorIndexSpecRequest + :members: +.. autoclass:: DeltaSyncVectorIndexSpecResponse + :members: +.. autoclass:: DirectAccessVectorIndexSpec + :members: +.. autoclass:: EmbeddingConfig + :members: +.. autoclass:: EmbeddingSourceColumn + :members: +.. autoclass:: EmbeddingVectorColumn + :members: +.. autoclass:: EndpointInfo + :members: +.. autoclass:: EndpointStatus + :members: +.. autoclass:: ListEndpointResponse + :members: +.. autoclass:: ListVectorIndexesResponse + :members: +.. autoclass:: MiniVectorIndex + :members: +.. autoclass:: QueryVectorIndexRequest + :members: +.. autoclass:: QueryVectorIndexResponse + :members: +.. autoclass:: ResultData + :members: +.. autoclass:: ResultManifest + :members: +.. autoclass:: UpsertDataResult + :members: +.. autoclass:: UpsertDataVectorIndexRequest + :members: +.. autoclass:: UpsertDataVectorIndexResponse + :members: +.. autoclass:: VectorIndex + :members: +.. autoclass:: VectorIndexStatus + :members: \ No newline at end of file diff --git a/docs/dbdataclasses/workspace.rst b/docs/dbdataclasses/workspace.rst new file mode 100644 index 000000000..4fa00e31b --- /dev/null +++ b/docs/dbdataclasses/workspace.rst @@ -0,0 +1,96 @@ +Workspace +========= + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.workspace`` module. + +.. py:currentmodule:: databricks.sdk.service.workspace +.. autoclass:: AclItem + :members: +.. autoclass:: AzureKeyVaultSecretScopeMetadata + :members: +.. autoclass:: CreateCredentials + :members: +.. autoclass:: CreateCredentialsResponse + :members: +.. autoclass:: CreateRepo + :members: +.. autoclass:: CreateScope + :members: +.. autoclass:: CredentialInfo + :members: +.. autoclass:: Delete + :members: +.. autoclass:: DeleteAcl + :members: +.. autoclass:: DeleteScope + :members: +.. autoclass:: DeleteSecret + :members: +.. autoclass:: ExportResponse + :members: +.. autoclass:: GetCredentialsResponse + :members: +.. autoclass:: GetRepoPermissionLevelsResponse + :members: +.. autoclass:: GetSecretResponse + :members: +.. autoclass:: GetWorkspaceObjectPermissionLevelsResponse + :members: +.. autoclass:: Import + :members: +.. autoclass:: ListAclsResponse + :members: +.. autoclass:: ListReposResponse + :members: +.. autoclass:: ListResponse + :members: +.. autoclass:: ListScopesResponse + :members: +.. autoclass:: ListSecretsResponse + :members: +.. autoclass:: Mkdirs + :members: +.. autoclass:: ObjectInfo + :members: +.. autoclass:: PutAcl + :members: +.. autoclass:: PutSecret + :members: +.. autoclass:: RepoAccessControlRequest + :members: +.. autoclass:: RepoAccessControlResponse + :members: +.. autoclass:: RepoInfo + :members: +.. autoclass:: RepoPermission + :members: +.. autoclass:: RepoPermissions + :members: +.. autoclass:: RepoPermissionsDescription + :members: +.. autoclass:: RepoPermissionsRequest + :members: +.. autoclass:: SecretMetadata + :members: +.. autoclass:: SecretScope + :members: +.. autoclass:: SparseCheckout + :members: +.. autoclass:: SparseCheckoutUpdate + :members: +.. autoclass:: UpdateCredentials + :members: +.. autoclass:: UpdateRepo + :members: +.. autoclass:: WorkspaceObjectAccessControlRequest + :members: +.. autoclass:: WorkspaceObjectAccessControlResponse + :members: +.. autoclass:: WorkspaceObjectPermission + :members: +.. autoclass:: WorkspaceObjectPermissions + :members: +.. autoclass:: WorkspaceObjectPermissionsDescription + :members: +.. autoclass:: WorkspaceObjectPermissionsRequest + :members: \ No newline at end of file diff --git a/docs/gen-client-docs.py b/docs/gen-client-docs.py index 948da61b7..23ab1a5e6 100644 --- a/docs/gen-client-docs.py +++ b/docs/gen-client-docs.py @@ -1,8 +1,21 @@ #!env python3 +import collections +import dbdataclasses +import inspect +import json import os.path -from dataclasses import dataclass +import subprocess +import importlib +from dataclasses import dataclass, is_dataclass +from enum import Enum +from pathlib import Path +from typing import Optional, Any, get_args + +from databricks.sdk import AccountClient, WorkspaceClient +from databricks.sdk.core import credentials_provider __dir__ = os.path.dirname(__file__) +__examples__ = Path(f'{__dir__}/../examples').absolute() @dataclass @@ -12,9 +25,157 @@ class Package: description: str +@dataclass +class Tag: + name: str + service: str + is_account: bool + package: Package + + +@dataclass +class TypedArgument: + name: str + tpe: Optional[str] + default: Optional[Any] + + def __str__(self): + ret = self.name + if self.tpe is not None: + ret += f': {self.tpe}' + elif self.default is not None: + tpe = type(self.default) + if tpe.__module__ == 'builtins': + ret += f': {tpe.__name__}' + else: + ret += f': {tpe.__module__}.{tpe.__name__}' + if self.default is not None: + ret += f' = {self.default}' + return ret + + +@dataclass +class MethodDoc: + method_name: str + doc: Optional[str] + required_args: list[TypedArgument] + kwonly_args: list[TypedArgument] + return_type: Optional[str] + + def argspec(self): + args = ', '.join([str(x) for x in self.required_args]) + if len(self.kwonly_args) > 0: + other = ', '.join([str(x) for x in self.kwonly_args]) + args = f'{args} [, {other}]' + return args + + def as_rst(self, usage) -> str: + ret_annotation = f' -> {self.return_type}' if self.return_type is not None else '' + out = ['', f' .. py:method:: {self.method_name}({self.argspec()}){ret_annotation}', ''] + if usage != '': + out.append(usage) + if self.doc is not None: + out.append(f' {self.doc}') + return "\n".join(out) + + +@dataclass +class ServiceDoc: + client_prefix: str + service_name: str + class_name: str + methods: list[MethodDoc] + doc: str + tag: Tag + + def as_rst(self) -> str: + if not self.doc: + self.doc = '' + title = f'``{self.client_prefix}.{self.service_name}``: {self.tag.name}' + out = [ + title, '=' * len(title), + f'.. currentmodule:: databricks.sdk.service.{self.tag.package.name}', '', + f'.. py:class:: {self.class_name}', '', f' {self.doc}' + ] + for m in self.methods: + usage = self.usage_example(m) + rst = m.as_rst(usage) + if not rst: + continue + out.append(rst) + + return "\n".join(out) + + def usage_example(self, m): + out = [] + example_root, example_files = self.examples() + for potential_example in example_files: + if not potential_example.startswith(m.method_name): + continue + out.append("") + out.append(" Usage:") + out.append("") + out.append(" .. code-block::") + out.append("") + with (example_root / potential_example).open('r') as f: + for line in f.readlines(): + line = line.rstrip("\n") + out.append(f' {line}') + out.append("") + return "\n".join(out) + return "" + + def examples(self): + try: + root = __examples__ / self.service_name + return root, os.listdir(root) + except: + return None, [] + +@dataclass +class DataclassesDoc: + package: Package + dataclasses: list[str] + + def as_rst(self) -> str: + title = f'{self.package.label}' + out = [ + title, '=' * len(title), '', + f'These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.{self.package.name}`` module.', + '', + f'.. py:currentmodule:: databricks.sdk.service.{self.package.name}', + ] + for d in self.dataclasses: + out.append(self.dataclass_rst(d)) + return "\n".join(out) + + def dataclass_rst(self, cls) -> str: + mod = importlib.import_module(f'databricks.sdk.service.{self.package.name}') + clss = getattr(mod, cls) + if issubclass(clss, Enum): + out = [ + f'.. py:class:: {cls}', + '', + ] + if clss.__doc__ is not None: + out.append(f' {clss.__doc__}') + out.append('') + for v in clss.__members__.keys(): + out.append(f' .. py:attribute:: {v}') + out.append(f' :value: "{v}"') + out.append('') + else: + out = [ + f'.. autoclass:: {cls}', + ' :members:' + '' + ] + return "\n".join(out) + + class Generator: packages = [ - Package("workspace", "Databricks Workspace", + Package("workspace", "Workspace", "Manage workspace-level entities that include notebooks, Git checkouts, and secrets"), Package("compute", "Compute", "Use and configure compute for Databricks"), Package("jobs", "Jobs", "Schedule automated jobs on Databricks Workspaces"), @@ -42,50 +203,200 @@ class Generator: "Resource management for secure Databricks Workspace deployment, cross-account IAM roles, " + "storage, encryption, networking and private access."), Package("billing", "Billing", "Configure different aspects of Databricks billing and usage."), - Package("oauth2", "OAuth", "Configure OAuth 2.0 application registrations for Databricks") + Package("oauth2", "OAuth", "Configure OAuth 2.0 application registrations for Databricks"), + Package("vectorsearch", "Vector Search", "Create and query Vector Search indexes"), + Package("dashboards", "Dashboards", "Manage Lakeview dashboards"), ] - def write_reference(self): + def __init__(self): + self.mapping = self._load_mapping() + + def _openapi_spec(self) -> str: + if 'DATABRICKS_OPENAPI_SPEC' in os.environ: + with open(os.environ['DATABRICKS_OPENAPI_SPEC']) as f: + return f.read() + with open(f'{__dir__}/../.codegen/_openapi_sha') as f: + sha = f.read().strip() + return subprocess.check_output(['deco', 'openapi', 'get', sha]).decode('utf-8') + + def _load_mapping(self) -> dict[str, Tag]: + mapping = {} + pkgs = {p.name: p for p in self.packages} + spec = json.loads(self._openapi_spec()) + for tag in spec['tags']: + t = Tag(name=tag['name'], + service=tag['x-databricks-service'], + is_account=tag.get('x-databricks-is-accounts', False), + package=pkgs[tag['x-databricks-package']]) + mapping[tag['name']] = t + return mapping + + @staticmethod + def _get_type_from_annotations(annotations, name): + tpe = annotations.get(name) + if len(get_args(tpe)) > 0: + tpe = get_args(tpe)[0] + if isinstance(tpe, type): + tpe = tpe.__name__ + return tpe + + @staticmethod + def _to_typed_args(argspec: inspect.FullArgSpec, required: bool) -> list[TypedArgument]: + annotations = argspec.annotations if argspec.annotations is not None else {} + if required: + argslist = argspec.args[1:] + defaults = {} + for i, x in enumerate(argspec.defaults if argspec.defaults is not None else []): + defaults[argslist[i - len(argspec.defaults)]] = x + else: + argslist = argspec.kwonlyargs + defaults = argspec.kwonlydefaults + out = [] + for arg in argslist: + tpe = Generator._get_type_from_annotations(annotations, arg) + out.append(TypedArgument(name=arg, tpe=tpe, default=defaults.get(arg))) + return out + + def class_methods(self, inst) -> list[MethodDoc]: + method_docs = [] + for name in dir(inst): + if name[0] == '_': + # private members + continue + instance_attr = getattr(inst, name) + if not callable(instance_attr): + continue + args = inspect.getfullargspec(instance_attr) + method_docs.append( + MethodDoc(method_name=name, + required_args=self._to_typed_args(args, required=True), + kwonly_args=self._to_typed_args(args, required=False), + doc=instance_attr.__doc__, + return_type=Generator._get_type_from_annotations(args.annotations, 'return'))) + return method_docs + + def service_docs(self, client_inst) -> list[ServiceDoc]: + client_prefix = 'w' if isinstance(client_inst, WorkspaceClient) else 'a' + ignore_client_fields = ('config', 'dbutils', 'api_client', 'files') + all = [] + for service_name, service_inst in inspect.getmembers(client_inst): + if service_name.startswith('_'): + continue + if service_name in ignore_client_fields: + continue + class_doc = service_inst.__doc__ + class_name = service_inst.__class__.__name__ + all.append( + ServiceDoc(client_prefix=client_prefix, + service_name=service_name, + class_name=class_name, + doc=class_doc, + tag=self._get_tag_name(service_inst.__class__.__name__, service_name), + methods=self.class_methods(service_inst))) + return all + + @staticmethod + def _should_document(obj): + return is_dataclass(obj) or (type(obj) == type and issubclass(obj, Enum)) + + @staticmethod + def _make_folder_if_not_exists(folder): + if not os.path.exists(folder): + os.makedirs(folder) + + def write_dataclass_docs(self): + self._make_folder_if_not_exists(f'{__dir__}/dbdataclasses') for pkg in self.packages: - self._write_client_package_doc(pkg) - self._write_reference_toc() + module = importlib.import_module(f'databricks.sdk.service.{pkg.name}') + all_members = [name for name, _ in inspect.getmembers(module, predicate=self._should_document)] + doc = DataclassesDoc(package=pkg, dataclasses=sorted(all_members)) + with open(f'{__dir__}/dbdataclasses/{pkg.name}.rst', 'w') as f: + f.write(doc.as_rst()) + all = "\n ".join([f'{p.name}' for p in self.packages]) + with open(f'{__dir__}/dbdataclasses/index.rst', 'w') as f: + f.write(f''' +Dataclasses +=========== - def _write_client_package_doc(self, pkg: Package): - title = f'``{pkg.name}``: {pkg.label}' - has_mixin = os.path.exists(f'{__dir__}/../databricks/sdk/mixins/{pkg.name}.py') - with open(f'{__dir__}/autogen/{pkg.name}.rst', 'w') as f: +.. toctree:: + :maxdepth: 1 + + {all}''') + + def _get_tag_name(self, class_name, service_name) -> Tag: + if class_name[-3:] == 'Ext': + # ClustersExt, DbfsExt, WorkspaceExt, but not ExternalLocations + class_name = class_name.replace('Ext', 'API') + class_name = class_name[:-3] + for tag_name, t in self.mapping.items(): + if t.service == class_name: + return t + raise KeyError(f'Cannot find {class_name} / {service_name} tag') + + def load_client(self, client, folder, label, description): + client_services = [] + package_to_services = collections.defaultdict(list) + service_docs = self.service_docs(client) + for svc in service_docs: + client_services.append(svc.service_name) + package = svc.tag.package.name + package_to_services[package].append(svc.service_name) + self._make_folder_if_not_exists(f'{__dir__}/{folder}/{package}') + with open(f'{__dir__}/{folder}/{package}/{svc.service_name}.rst', 'w') as f: + f.write(svc.as_rst()) + ordered_packages = [] + for pkg in self.packages: + if pkg.name not in package_to_services: + continue + ordered_packages.append(pkg.name) + self._write_client_package_doc(folder, pkg, package_to_services[pkg.name]) + self._write_client_packages(folder, label, description, ordered_packages) + + def _write_client_packages(self, folder: str, label: str, description: str, packages: list[str]): + """Writes out the top-level index for the APIs supported by a client.""" + self._make_folder_if_not_exists(f'{__dir__}/{folder}') + with open(f'{__dir__}/{folder}/index.rst', 'w') as f: + all = "\n ".join([f'{name}/index' for name in packages]) f.write(f''' -{title} -{'=' * len(title)} +{label} +{'=' * len(label)} -{pkg.description} - -.. automodule:: databricks.sdk.service.{pkg.name} - :members: - :undoc-members: -''') - if has_mixin: - f.write(f''' -.. automodule:: databricks.sdk.mixins.{pkg.name} - :members: - :inherited-members: - :undoc-members: -''') - - def _write_reference_toc(self): - all = '\n'.join([f' {p.name}' for p in sorted(self.packages, key=lambda p: p.name)]) - with open(f'{__dir__}/autogen/reference.rst', 'w') as f: +{description} + +.. toctree:: + :maxdepth: 1 + + {all}''') + + def _write_client_package_doc(self, folder: str, pkg: Package, services: list[str]): + """Writes out the index for a single package supported by a client.""" + self._make_folder_if_not_exists(f'{__dir__}/{folder}/{pkg.name}') + with open(f'{__dir__}/{folder}/{pkg.name}/index.rst', 'w') as f: + all = "\n ".join(services) f.write(f''' -Reference -========= +{pkg.label} +{'=' * len(pkg.label)} + +{pkg.description} .. toctree:: :maxdepth: 1 - -{all} -''') + + {all}''') if __name__ == '__main__': + + @credentials_provider('noop', []) + def noop_credentials(_: any): + return lambda: {} + gen = Generator() - gen.write_reference() + + w = WorkspaceClient(credentials_provider=noop_credentials) + gen.load_client(w, 'workspace', 'Workspace APIs', 'These APIs are available from WorkspaceClient') + + a = AccountClient(credentials_provider=noop_credentials) + gen.load_client(a, 'account', 'Account APIs', 'These APIs are available from AccountClient') + + gen.write_dataclass_docs() diff --git a/docs/getting-started.md b/docs/getting-started.md index f0b0c6439..71f735c62 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -20,6 +20,41 @@ followed by dbutils.library.restartPython() ``` +## Usage Overview + +At its core, the SDK exposes two primary clients: `databricks.sdk.WorkspaceClient` and `databricks.sdk.AccountClient`. The `WorkspaceClient` is tailored for interacting with resources within the Databricks workspace, such as notebooks, jobs, and clusters, while the `AccountClient` focuses on account-level functionalities including user and group management, billing, and workspace provisioning and management. + +To use the SDK to call an API, first find the API in either the [Workspace API Reference](workspace/index.rst) or [Account API reference](account/index.rst). Then, on the appropriate client, call the corresponding method. All API calls have the form + +``` +w..() +``` +or +``` +a..(parameters) +``` + +For example, to list all SQL queries in the workspace, run: + +```python +# Authenticate as described above +from databricks.sdk import WorkspaceClient +w = WorkspaceClient() +for query in w.queries.list(): + print(f'query {query.name} was created at {query.created_at}') +``` + +To list all workspaces in the account, run: + +```python +# Authenticate as described above +from databricks.sdk import AccountClient +a = AccountClient() +for workspace in a.workspaces.list(): + print(f'workspace {workspace.workspace_name} was created at {workspace.creation_time}') +``` + + ## Authentication There are two primary entry points to the Databricks SDK: @@ -61,7 +96,7 @@ The Databricks SDK for Python makes use of Python's data classes and enums to re Specific data classes are organized into separate packages under `databricks.sdk.service`. For example, `databricks.sdk.service.jobs` has defintions for data classes & enums related to the Jobs API. -For more information, consult the [API Reference](autogen/reference.rst). +For more information, consult the [Dataclasses API Reference](dbdataclasses/index.rst). ## Examples diff --git a/docs/index.rst b/docs/index.rst index 581077432..a4873c43e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,7 +8,7 @@ This SDK is supported for production use cases, but we do expect future releases We are keen to hear feedback from you on these SDKs. Please `file GitHub issues `_, and we will address them. .. toctree:: - :maxdepth: 2 + :maxdepth: 3 getting-started authentication @@ -17,7 +17,10 @@ We are keen to hear feedback from you on these SDKs. Please `file GitHub issues pagination logging dbutils - clients - autogen/reference + clients/workspace + workspace/index + clients/account + account/index + dbdataclasses/index diff --git a/docs/oauth.md b/docs/oauth.md index b5768522b..b04adbfac 100644 --- a/docs/oauth.md +++ b/docs/oauth.md @@ -55,11 +55,11 @@ for cl in clusters: It will launch a browser, prompting user to login with Azure credentials and give consent like described on the following screen: -![](./images/aad-approve-app.png) +![](images/aad-approve-app.png) After giving consent, the user can close the browser tab: -![](./images/external-browser-finish.png) +![](images/external-browser-finish.png) ### Public Client 3-legged OAuth flow on local machines @@ -316,4 +316,4 @@ custom_app = account_client.custom_app_integration.create( logging.info(f'Created new custom app: ' f'--client_id {custom_app.client_id} ' f'--client_secret {custom_app.client_secret}') -``` \ No newline at end of file +``` diff --git a/docs/workspace/catalog/artifact_allowlists.rst b/docs/workspace/catalog/artifact_allowlists.rst new file mode 100644 index 000000000..349bbbd0f --- /dev/null +++ b/docs/workspace/catalog/artifact_allowlists.rst @@ -0,0 +1,37 @@ +``w.artifact_allowlists``: Artifact Allowlists +============================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: ArtifactAllowlistsAPI + + In Databricks Runtime 13.3 and above, you can add libraries and init scripts to the `allowlist` in UC so + that users can leverage these artifacts on compute configured with shared access mode. + + .. py:method:: get(artifact_type: ArtifactType) -> ArtifactAllowlistInfo + + Get an artifact allowlist. + + Get the artifact allowlist of a certain artifact type. The caller must be a metastore admin or have + the **MANAGE ALLOWLIST** privilege on the metastore. + + :param artifact_type: :class:`ArtifactType` + The artifact type of the allowlist. + + :returns: :class:`ArtifactAllowlistInfo` + + + .. py:method:: update(artifact_type: ArtifactType, artifact_matchers: List[ArtifactMatcher]) -> ArtifactAllowlistInfo + + Set an artifact allowlist. + + Set the artifact allowlist of a certain artifact type. The whole artifact allowlist is replaced with + the new allowlist. The caller must be a metastore admin or have the **MANAGE ALLOWLIST** privilege on + the metastore. + + :param artifact_type: :class:`ArtifactType` + The artifact type of the allowlist. + :param artifact_matchers: List[:class:`ArtifactMatcher`] + A list of allowed artifact match patterns. + + :returns: :class:`ArtifactAllowlistInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/catalogs.rst b/docs/workspace/catalog/catalogs.rst new file mode 100644 index 000000000..5592152bb --- /dev/null +++ b/docs/workspace/catalog/catalogs.rst @@ -0,0 +1,169 @@ +``w.catalogs``: Catalogs +======================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: CatalogsAPI + + A catalog is the first layer of Unity Catalog’s three-level namespace. It’s used to organize your data + assets. Users can see all catalogs on which they have been assigned the USE_CATALOG data permission. + + In Unity Catalog, admins and data stewards manage users and their access to data centrally across all of + the workspaces in a Databricks account. Users in different workspaces can share access to the same data, + depending on privileges granted centrally in Unity Catalog. + + .. py:method:: create(name: str [, comment: Optional[str], connection_name: Optional[str], options: Optional[Dict[str, str]], properties: Optional[Dict[str, str]], provider_name: Optional[str], share_name: Optional[str], storage_root: Optional[str]]) -> CatalogInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + # cleanup + w.catalogs.delete(name=created.name, force=True) + + Create a catalog. + + Creates a new catalog instance in the parent metastore if the caller is a metastore admin or has the + **CREATE_CATALOG** privilege. + + :param name: str + Name of catalog. + :param comment: str (optional) + User-provided free-form text description. + :param connection_name: str (optional) + The name of the connection to an external data source. + :param options: Dict[str,str] (optional) + A map of key-value properties attached to the securable. + :param properties: Dict[str,str] (optional) + A map of key-value properties attached to the securable. + :param provider_name: str (optional) + The name of delta sharing provider. + + A Delta Sharing catalog is a catalog that is based on a Delta share on a remote sharing server. + :param share_name: str (optional) + The name of the share under the share provider. + :param storage_root: str (optional) + Storage root URL for managed tables within catalog. + + :returns: :class:`CatalogInfo` + + + .. py:method:: delete(name: str [, force: Optional[bool]]) + + Delete a catalog. + + Deletes the catalog that matches the supplied name. The caller must be a metastore admin or the owner + of the catalog. + + :param name: str + The name of the catalog. + :param force: bool (optional) + Force deletion even if the catalog is not empty. + + + + + .. py:method:: get(name: str) -> CatalogInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + _ = w.catalogs.get(name=created.name) + + # cleanup + w.catalogs.delete(name=created.name, force=True) + + Get a catalog. + + Gets the specified catalog in a metastore. The caller must be a metastore admin, the owner of the + catalog, or a user that has the **USE_CATALOG** privilege set for their account. + + :param name: str + The name of the catalog. + + :returns: :class:`CatalogInfo` + + + .. py:method:: list() -> Iterator[CatalogInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.catalogs.list() + + List catalogs. + + Gets an array of catalogs in the metastore. If the caller is the metastore admin, all catalogs will be + retrieved. Otherwise, only catalogs owned by the caller (or for which the caller has the + **USE_CATALOG** privilege) will be retrieved. There is no guarantee of a specific ordering of the + elements in the array. + + :returns: Iterator over :class:`CatalogInfo` + + + .. py:method:: update(name: str [, comment: Optional[str], enable_predictive_optimization: Optional[EnablePredictiveOptimization], isolation_mode: Optional[IsolationMode], new_name: Optional[str], owner: Optional[str], properties: Optional[Dict[str, str]]]) -> CatalogInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + _ = w.catalogs.update(name=created.name, comment="updated") + + # cleanup + w.catalogs.delete(name=created.name, force=True) + + Update a catalog. + + Updates the catalog that matches the supplied name. The caller must be either the owner of the + catalog, or a metastore admin (when changing the owner field of the catalog). + + :param name: str + The name of the catalog. + :param comment: str (optional) + User-provided free-form text description. + :param enable_predictive_optimization: :class:`EnablePredictiveOptimization` (optional) + Whether predictive optimization should be enabled for this object and objects under it. + :param isolation_mode: :class:`IsolationMode` (optional) + Whether the current securable is accessible from all workspaces or a specific set of workspaces. + :param new_name: str (optional) + New name for the catalog. + :param owner: str (optional) + Username of current owner of catalog. + :param properties: Dict[str,str] (optional) + A map of key-value properties attached to the securable. + + :returns: :class:`CatalogInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/connections.rst b/docs/workspace/catalog/connections.rst new file mode 100644 index 000000000..6125db714 --- /dev/null +++ b/docs/workspace/catalog/connections.rst @@ -0,0 +1,208 @@ +``w.connections``: Connections +============================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: ConnectionsAPI + + Connections allow for creating a connection to an external data source. + + A connection is an abstraction of an external data source that can be connected from Databricks Compute. + Creating a connection object is the first step to managing external data sources within Unity Catalog, + with the second step being creating a data object (catalog, schema, or table) using the connection. Data + objects derived from a connection can be written to or read from similar to other Unity Catalog data + objects based on cloud storage. Users may create different types of connections with each connection + having a unique set of configuration options to support credential management and other settings. + + .. py:method:: create(name: str, connection_type: ConnectionType, options: Dict[str, str] [, comment: Optional[str], properties: Optional[Dict[str, str]], read_only: Optional[bool]]) -> ConnectionInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + conn_create = w.connections.create(comment="Go SDK Acceptance Test Connection", + connection_type=catalog.ConnectionType.DATABRICKS, + name=f'sdk-{time.time_ns()}', + options={ + "host": + "%s-fake-workspace.cloud.databricks.com" % (f'sdk-{time.time_ns()}'), + "httpPath": + "/sql/1.0/warehouses/%s" % (f'sdk-{time.time_ns()}'), + "personalAccessToken": + f'sdk-{time.time_ns()}', + }) + + # cleanup + w.connections.delete(name_arg=conn_create.name) + + Create a connection. + + Creates a new connection + + Creates a new connection to an external data source. It allows users to specify connection details and + configurations for interaction with the external server. + + :param name: str + Name of the connection. + :param connection_type: :class:`ConnectionType` + The type of connection. + :param options: Dict[str,str] + A map of key-value properties attached to the securable. + :param comment: str (optional) + User-provided free-form text description. + :param properties: Dict[str,str] (optional) + An object containing map of key-value properties attached to the connection. + :param read_only: bool (optional) + If the connection is read only. + + :returns: :class:`ConnectionInfo` + + + .. py:method:: delete(name_arg: str) + + Delete a connection. + + Deletes the connection that matches the supplied name. + + :param name_arg: str + The name of the connection to be deleted. + + + + + .. py:method:: get(name_arg: str) -> ConnectionInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + conn_create = w.connections.create(comment="Go SDK Acceptance Test Connection", + connection_type=catalog.ConnectionType.DATABRICKS, + name=f'sdk-{time.time_ns()}', + options={ + "host": + "%s-fake-workspace.cloud.databricks.com" % (f'sdk-{time.time_ns()}'), + "httpPath": + "/sql/1.0/warehouses/%s" % (f'sdk-{time.time_ns()}'), + "personalAccessToken": + f'sdk-{time.time_ns()}', + }) + + conn_update = w.connections.update(name=conn_create.name, + name_arg=conn_create.name, + options={ + "host": + "%s-fake-workspace.cloud.databricks.com" % (f'sdk-{time.time_ns()}'), + "httpPath": + "/sql/1.0/warehouses/%s" % (f'sdk-{time.time_ns()}'), + "personalAccessToken": + f'sdk-{time.time_ns()}', + }) + + conn = w.connections.get(name_arg=conn_update.name) + + # cleanup + w.connections.delete(name_arg=conn_create.name) + + Get a connection. + + Gets a connection from it's name. + + :param name_arg: str + Name of the connection. + + :returns: :class:`ConnectionInfo` + + + .. py:method:: list() -> Iterator[ConnectionInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + conn_list = w.connections.list() + + List connections. + + List all connections. + + :returns: Iterator over :class:`ConnectionInfo` + + + .. py:method:: update(name_arg: str, options: Dict[str, str] [, name: Optional[str], new_name: Optional[str], owner: Optional[str]]) -> ConnectionInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + conn_create = w.connections.create(comment="Go SDK Acceptance Test Connection", + connection_type=catalog.ConnectionType.DATABRICKS, + name=f'sdk-{time.time_ns()}', + options={ + "host": + "%s-fake-workspace.cloud.databricks.com" % (f'sdk-{time.time_ns()}'), + "httpPath": + "/sql/1.0/warehouses/%s" % (f'sdk-{time.time_ns()}'), + "personalAccessToken": + f'sdk-{time.time_ns()}', + }) + + conn_update = w.connections.update(name=conn_create.name, + name_arg=conn_create.name, + options={ + "host": + "%s-fake-workspace.cloud.databricks.com" % (f'sdk-{time.time_ns()}'), + "httpPath": + "/sql/1.0/warehouses/%s" % (f'sdk-{time.time_ns()}'), + "personalAccessToken": + f'sdk-{time.time_ns()}', + }) + + # cleanup + w.connections.delete(name_arg=conn_create.name) + + Update a connection. + + Updates the connection that matches the supplied name. + + :param name_arg: str + Name of the connection. + :param options: Dict[str,str] + A map of key-value properties attached to the securable. + :param name: str (optional) + Name of the connection. + :param new_name: str (optional) + New name for the connection. + :param owner: str (optional) + Username of current owner of the connection. + + :returns: :class:`ConnectionInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/external_locations.rst b/docs/workspace/catalog/external_locations.rst new file mode 100644 index 000000000..e9e86fb41 --- /dev/null +++ b/docs/workspace/catalog/external_locations.rst @@ -0,0 +1,222 @@ +``w.external_locations``: External Locations +============================================ +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: ExternalLocationsAPI + + An external location is an object that combines a cloud storage path with a storage credential that + authorizes access to the cloud storage path. Each external location is subject to Unity Catalog + access-control policies that control which users and groups can access the credential. If a user does not + have access to an external location in Unity Catalog, the request fails and Unity Catalog does not attempt + to authenticate to your cloud tenant on the user’s behalf. + + Databricks recommends using external locations rather than using storage credentials directly. + + To create external locations, you must be a metastore admin or a user with the + **CREATE_EXTERNAL_LOCATION** privilege. + + .. py:method:: create(name: str, url: str, credential_name: str [, access_point: Optional[str], comment: Optional[str], encryption_details: Optional[EncryptionDetails], read_only: Optional[bool], skip_validation: Optional[bool]]) -> ExternalLocationInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + storage_credential = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), + comment="created via SDK") + + external_location = w.external_locations.create(name=f'sdk-{time.time_ns()}', + credential_name=storage_credential.name, + comment="created via SDK", + url="s3://" + os.environ["TEST_BUCKET"] + "/" + + f'sdk-{time.time_ns()}') + + # cleanup + w.storage_credentials.delete(name=storage_credential.name) + w.external_locations.delete(name=external_location.name) + + Create an external location. + + Creates a new external location entry in the metastore. The caller must be a metastore admin or have + the **CREATE_EXTERNAL_LOCATION** privilege on both the metastore and the associated storage + credential. + + :param name: str + Name of the external location. + :param url: str + Path URL of the external location. + :param credential_name: str + Name of the storage credential used with this location. + :param access_point: str (optional) + The AWS access point to use when accesing s3 for this external location. + :param comment: str (optional) + User-provided free-form text description. + :param encryption_details: :class:`EncryptionDetails` (optional) + Encryption options that apply to clients connecting to cloud storage. + :param read_only: bool (optional) + Indicates whether the external location is read-only. + :param skip_validation: bool (optional) + Skips validation of the storage credential associated with the external location. + + :returns: :class:`ExternalLocationInfo` + + + .. py:method:: delete(name: str [, force: Optional[bool]]) + + Delete an external location. + + Deletes the specified external location from the metastore. The caller must be the owner of the + external location. + + :param name: str + Name of the external location. + :param force: bool (optional) + Force deletion even if there are dependent external tables or mounts. + + + + + .. py:method:: get(name: str) -> ExternalLocationInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + credential = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + created = w.external_locations.create(name=f'sdk-{time.time_ns()}', + credential_name=credential.name, + url=f's3://{os.environ["TEST_BUCKET"]}/sdk-{time.time_ns()}') + + _ = w.external_locations.get(get=created.name) + + # cleanup + w.storage_credentials.delete(delete=credential.name) + w.external_locations.delete(delete=created.name) + + Get an external location. + + Gets an external location from the metastore. The caller must be either a metastore admin, the owner + of the external location, or a user that has some privilege on the external location. + + :param name: str + Name of the external location. + + :returns: :class:`ExternalLocationInfo` + + + .. py:method:: list( [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[ExternalLocationInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + all = w.external_locations.list(catalog.ListExternalLocationsRequest()) + + List external locations. + + Gets an array of external locations (__ExternalLocationInfo__ objects) from the metastore. The caller + must be a metastore admin, the owner of the external location, or a user that has some privilege on + the external location. For unpaginated request, there is no guarantee of a specific ordering of the + elements in the array. For paginated request, elements are ordered by their name. + + :param max_results: int (optional) + Maximum number of external locations to return. If not set, all the external locations are returned + (not recommended). - when set to a value greater than 0, the page length is the minimum of this + value and a server configured value; - when set to 0, the page length is set to a server configured + value (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`ExternalLocationInfo` + + + .. py:method:: update(name: str [, access_point: Optional[str], comment: Optional[str], credential_name: Optional[str], encryption_details: Optional[EncryptionDetails], force: Optional[bool], new_name: Optional[str], owner: Optional[str], read_only: Optional[bool], skip_validation: Optional[bool], url: Optional[str]]) -> ExternalLocationInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + credential = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + created = w.external_locations.create(name=f'sdk-{time.time_ns()}', + credential_name=credential.name, + url="s3://%s/%s" % (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + _ = w.external_locations.update(name=created.name, + credential_name=credential.name, + url="s3://%s/%s" % (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + # cleanup + w.storage_credentials.delete(name=credential.name) + w.external_locations.delete(name=created.name) + + Update an external location. + + Updates an external location in the metastore. The caller must be the owner of the external location, + or be a metastore admin. In the second case, the admin can only update the name of the external + location. + + :param name: str + Name of the external location. + :param access_point: str (optional) + The AWS access point to use when accesing s3 for this external location. + :param comment: str (optional) + User-provided free-form text description. + :param credential_name: str (optional) + Name of the storage credential used with this location. + :param encryption_details: :class:`EncryptionDetails` (optional) + Encryption options that apply to clients connecting to cloud storage. + :param force: bool (optional) + Force update even if changing url invalidates dependent external tables or mounts. + :param new_name: str (optional) + New name for the external location. + :param owner: str (optional) + The owner of the external location. + :param read_only: bool (optional) + Indicates whether the external location is read-only. + :param skip_validation: bool (optional) + Skips validation of the storage credential associated with the external location. + :param url: str (optional) + Path URL of the external location. + + :returns: :class:`ExternalLocationInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/functions.rst b/docs/workspace/catalog/functions.rst new file mode 100644 index 000000000..2cc572294 --- /dev/null +++ b/docs/workspace/catalog/functions.rst @@ -0,0 +1,110 @@ +``w.functions``: Functions +========================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: FunctionsAPI + + Functions implement User-Defined Functions (UDFs) in Unity Catalog. + + The function implementation can be any SQL expression or Query, and it can be invoked wherever a table + reference is allowed in a query. In Unity Catalog, a function resides at the same level as a table, so it + can be referenced with the form __catalog_name__.__schema_name__.__function_name__. + + .. py:method:: create(function_info: CreateFunction) -> FunctionInfo + + Create a function. + + Creates a new function + + The user must have the following permissions in order for the function to be created: - + **USE_CATALOG** on the function's parent catalog - **USE_SCHEMA** and **CREATE_FUNCTION** on the + function's parent schema + + :param function_info: :class:`CreateFunction` + Partial __FunctionInfo__ specifying the function to be created. + + :returns: :class:`FunctionInfo` + + + .. py:method:: delete(name: str [, force: Optional[bool]]) + + Delete a function. + + Deletes the function that matches the supplied name. For the deletion to succeed, the user must + satisfy one of the following conditions: - Is the owner of the function's parent catalog - Is the + owner of the function's parent schema and have the **USE_CATALOG** privilege on its parent catalog - + Is the owner of the function itself and have both the **USE_CATALOG** privilege on its parent catalog + and the **USE_SCHEMA** privilege on its parent schema + + :param name: str + The fully-qualified name of the function (of the form + __catalog_name__.__schema_name__.__function__name__). + :param force: bool (optional) + Force deletion even if the function is notempty. + + + + + .. py:method:: get(name: str) -> FunctionInfo + + Get a function. + + Gets a function from within a parent catalog and schema. For the fetch to succeed, the user must + satisfy one of the following requirements: - Is a metastore admin - Is an owner of the function's + parent catalog - Have the **USE_CATALOG** privilege on the function's parent catalog and be the owner + of the function - Have the **USE_CATALOG** privilege on the function's parent catalog, the + **USE_SCHEMA** privilege on the function's parent schema, and the **EXECUTE** privilege on the + function itself + + :param name: str + The fully-qualified name of the function (of the form + __catalog_name__.__schema_name__.__function__name__). + + :returns: :class:`FunctionInfo` + + + .. py:method:: list(catalog_name: str, schema_name: str [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[FunctionInfo] + + List functions. + + List functions within the specified parent catalog and schema. If the user is a metastore admin, all + functions are returned in the output list. Otherwise, the user must have the **USE_CATALOG** privilege + on the catalog and the **USE_SCHEMA** privilege on the schema, and the output list contains only + functions for which either the user has the **EXECUTE** privilege or the user is the owner. For + unpaginated request, there is no guarantee of a specific ordering of the elements in the array. For + paginated request, elements are ordered by their name. + + :param catalog_name: str + Name of parent catalog for functions of interest. + :param schema_name: str + Parent schema of functions. + :param max_results: int (optional) + Maximum number of functions to return. If not set, all the functions are returned (not recommended). + - when set to a value greater than 0, the page length is the minimum of this value and a server + configured value; - when set to 0, the page length is set to a server configured value + (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`FunctionInfo` + + + .. py:method:: update(name: str [, owner: Optional[str]]) -> FunctionInfo + + Update a function. + + Updates the function that matches the supplied name. Only the owner of the function can be updated. If + the user is not a metastore admin, the user must be a member of the group that is the new function + owner. - Is a metastore admin - Is the owner of the function's parent catalog - Is the owner of the + function's parent schema and has the **USE_CATALOG** privilege on its parent catalog - Is the owner of + the function itself and has the **USE_CATALOG** privilege on its parent catalog as well as the + **USE_SCHEMA** privilege on the function's parent schema. + + :param name: str + The fully-qualified name of the function (of the form + __catalog_name__.__schema_name__.__function__name__). + :param owner: str (optional) + Username of current owner of function. + + :returns: :class:`FunctionInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/grants.rst b/docs/workspace/catalog/grants.rst new file mode 100644 index 000000000..8def7ff83 --- /dev/null +++ b/docs/workspace/catalog/grants.rst @@ -0,0 +1,176 @@ +``w.grants``: Grants +==================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: GrantsAPI + + In Unity Catalog, data is secure by default. Initially, users have no access to data in a metastore. + Access can be granted by either a metastore admin, the owner of an object, or the owner of the catalog or + schema that contains the object. Securable objects in Unity Catalog are hierarchical and privileges are + inherited downward. + + Securable objects in Unity Catalog are hierarchical and privileges are inherited downward. This means that + granting a privilege on the catalog automatically grants the privilege to all current and future objects + within the catalog. Similarly, privileges granted on a schema are inherited by all current and future + objects within that schema. + + .. py:method:: get(securable_type: SecurableType, full_name: str [, principal: Optional[str]]) -> PermissionsList + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + table_name = f'sdk-{time.time_ns()}' + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + _ = w.statement_execution.execute(warehouse_id=os.environ["TEST_DEFAULT_WAREHOUSE_ID"], + catalog=created_catalog.name, + schema=created_schema.name, + statement="CREATE TABLE %s AS SELECT 2+2 as four" % (table_name)).result() + + table_full_name = "%s.%s.%s" % (created_catalog.name, created_schema.name, table_name) + + created_table = w.tables.get(full_name=table_full_name) + + grants = w.grants.get_effective(securable_type=catalog.SecurableType.TABLE, full_name=created_table.full_name) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.tables.delete(full_name=table_full_name) + + Get permissions. + + Gets the permissions for a securable. + + :param securable_type: :class:`SecurableType` + Type of securable. + :param full_name: str + Full name of securable. + :param principal: str (optional) + If provided, only the permissions for the specified principal (user or group) are returned. + + :returns: :class:`PermissionsList` + + + .. py:method:: get_effective(securable_type: SecurableType, full_name: str [, principal: Optional[str]]) -> EffectivePermissionsList + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + table_name = f'sdk-{time.time_ns()}' + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + _ = w.statement_execution.execute(warehouse_id=os.environ["TEST_DEFAULT_WAREHOUSE_ID"], + catalog=created_catalog.name, + schema=created_schema.name, + statement="CREATE TABLE %s AS SELECT 2+2 as four" % (table_name)).result() + + table_full_name = "%s.%s.%s" % (created_catalog.name, created_schema.name, table_name) + + created_table = w.tables.get(full_name=table_full_name) + + grants = w.grants.get_effective(securable_type=catalog.SecurableType.TABLE, full_name=created_table.full_name) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.tables.delete(full_name=table_full_name) + + Get effective permissions. + + Gets the effective permissions for a securable. + + :param securable_type: :class:`SecurableType` + Type of securable. + :param full_name: str + Full name of securable. + :param principal: str (optional) + If provided, only the effective permissions for the specified principal (user or group) are + returned. + + :returns: :class:`EffectivePermissionsList` + + + .. py:method:: update(securable_type: SecurableType, full_name: str [, changes: Optional[List[PermissionsChange]]]) -> PermissionsList + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + table_name = f'sdk-{time.time_ns()}' + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + _ = w.statement_execution.execute(warehouse_id=os.environ["TEST_DEFAULT_WAREHOUSE_ID"], + catalog=created_catalog.name, + schema=created_schema.name, + statement="CREATE TABLE %s AS SELECT 2+2 as four" % (table_name)).result() + + table_full_name = "%s.%s.%s" % (created_catalog.name, created_schema.name, table_name) + + account_level_group_name = os.environ["TEST_DATA_ENG_GROUP"] + + created_table = w.tables.get(full_name=table_full_name) + + x = w.grants.update(full_name=created_table.full_name, + securable_type=catalog.SecurableType.TABLE, + changes=[ + catalog.PermissionsChange(add=[catalog.Privilege.MODIFY, catalog.Privilege.SELECT], + principal=account_level_group_name) + ]) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.tables.delete(full_name=table_full_name) + + Update permissions. + + Updates the permissions for a securable. + + :param securable_type: :class:`SecurableType` + Type of securable. + :param full_name: str + Full name of securable. + :param changes: List[:class:`PermissionsChange`] (optional) + Array of permissions change objects. + + :returns: :class:`PermissionsList` + \ No newline at end of file diff --git a/docs/workspace/catalog/index.rst b/docs/workspace/catalog/index.rst new file mode 100644 index 000000000..b3b18dc2c --- /dev/null +++ b/docs/workspace/catalog/index.rst @@ -0,0 +1,25 @@ + +Unity Catalog +============= + +Configure data governance with Unity Catalog for metastores, catalogs, schemas, tables, external locations, and storage credentials + +.. toctree:: + :maxdepth: 1 + + artifact_allowlists + catalogs + connections + external_locations + functions + grants + metastores + model_versions + registered_models + schemas + storage_credentials + system_schemas + table_constraints + tables + volumes + workspace_bindings \ No newline at end of file diff --git a/docs/workspace/catalog/metastores.rst b/docs/workspace/catalog/metastores.rst new file mode 100644 index 000000000..a5beb397a --- /dev/null +++ b/docs/workspace/catalog/metastores.rst @@ -0,0 +1,314 @@ +``w.metastores``: Metastores +============================ +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: MetastoresAPI + + A metastore is the top-level container of objects in Unity Catalog. It stores data assets (tables and + views) and the permissions that govern access to them. Databricks account admins can create metastores and + assign them to Databricks workspaces to control which workloads use each metastore. For a workspace to use + Unity Catalog, it must have a Unity Catalog metastore attached. + + Each metastore is configured with a root storage location in a cloud storage account. This storage + location is used for metadata and managed tables data. + + NOTE: This metastore is distinct from the metastore included in Databricks workspaces created before Unity + Catalog was released. If your workspace includes a legacy Hive metastore, the data in that metastore is + available in a catalog named hive_metastore. + + .. py:method:: assign(workspace_id: int, metastore_id: str, default_catalog_name: str) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + workspace_id = os.environ["DUMMY_WORKSPACE_ID"] + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + w.metastores.assign(metastore_id=created.metastore_id, workspace_id=workspace_id) + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Create an assignment. + + Creates a new metastore assignment. If an assignment for the same __workspace_id__ exists, it will be + overwritten by the new __metastore_id__ and __default_catalog_name__. The caller must be an account + admin. + + :param workspace_id: int + A workspace ID. + :param metastore_id: str + The unique ID of the metastore. + :param default_catalog_name: str + The name of the default catalog in the metastore. + + + + + .. py:method:: create(name: str [, region: Optional[str], storage_root: Optional[str]]) -> MetastoreInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Create a metastore. + + Creates a new metastore based on a provided name and optional storage root path. By default (if the + __owner__ field is not set), the owner of the new metastore is the user calling the + __createMetastore__ API. If the __owner__ field is set to the empty string (**""**), the ownership is + assigned to the System User instead. + + :param name: str + The user-specified name of the metastore. + :param region: str (optional) + Cloud region which the metastore serves (e.g., `us-west-2`, `westus`). If this field is omitted, the + region of the workspace receiving the request will be used. + :param storage_root: str (optional) + The storage root URL for metastore + + :returns: :class:`MetastoreInfo` + + + .. py:method:: current() -> MetastoreAssignment + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + current_metastore = w.metastores.current() + + Get metastore assignment for workspace. + + Gets the metastore assignment for the workspace being accessed. + + :returns: :class:`MetastoreAssignment` + + + .. py:method:: delete(id: str [, force: Optional[bool]]) + + Delete a metastore. + + Deletes a metastore. The caller must be a metastore admin. + + :param id: str + Unique ID of the metastore. + :param force: bool (optional) + Force deletion even if the metastore is not empty. Default is false. + + + + + .. py:method:: get(id: str) -> MetastoreInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + _ = w.metastores.get(id=created.metastore_id) + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Get a metastore. + + Gets a metastore that matches the supplied ID. The caller must be a metastore admin to retrieve this + info. + + :param id: str + Unique ID of the metastore. + + :returns: :class:`MetastoreInfo` + + + .. py:method:: list() -> Iterator[MetastoreInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.metastores.list() + + List metastores. + + Gets an array of the available metastores (as __MetastoreInfo__ objects). The caller must be an admin + to retrieve this info. There is no guarantee of a specific ordering of the elements in the array. + + :returns: Iterator over :class:`MetastoreInfo` + + + .. py:method:: summary() -> GetMetastoreSummaryResponse + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + summary = w.metastores.summary() + + Get a metastore summary. + + Gets information about a metastore. This summary includes the storage credential, the cloud vendor, + the cloud region, and the global metastore ID. + + :returns: :class:`GetMetastoreSummaryResponse` + + + .. py:method:: unassign(workspace_id: int, metastore_id: str) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + workspace_id = os.environ["DUMMY_WORKSPACE_ID"] + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + w.metastores.unassign(metastore_id=created.metastore_id, workspace_id=workspace_id) + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Delete an assignment. + + Deletes a metastore assignment. The caller must be an account administrator. + + :param workspace_id: int + A workspace ID. + :param metastore_id: str + Query for the ID of the metastore to delete. + + + + + .. py:method:: update(id: str [, delta_sharing_organization_name: Optional[str], delta_sharing_recipient_token_lifetime_in_seconds: Optional[int], delta_sharing_scope: Optional[UpdateMetastoreDeltaSharingScope], name: Optional[str], new_name: Optional[str], owner: Optional[str], privilege_model_version: Optional[str], storage_root_credential_id: Optional[str]]) -> MetastoreInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.metastores.create(name=f'sdk-{time.time_ns()}', + storage_root="s3://%s/%s" % + (os.environ["TEST_BUCKET"], f'sdk-{time.time_ns()}')) + + _ = w.metastores.update(id=created.metastore_id, name=f'sdk-{time.time_ns()}') + + # cleanup + w.metastores.delete(id=created.metastore_id, force=True) + + Update a metastore. + + Updates information for a specific metastore. The caller must be a metastore admin. If the __owner__ + field is set to the empty string (**""**), the ownership is updated to the System User. + + :param id: str + Unique ID of the metastore. + :param delta_sharing_organization_name: str (optional) + The organization name of a Delta Sharing entity, to be used in Databricks-to-Databricks Delta + Sharing as the official name. + :param delta_sharing_recipient_token_lifetime_in_seconds: int (optional) + The lifetime of delta sharing recipient token in seconds. + :param delta_sharing_scope: :class:`UpdateMetastoreDeltaSharingScope` (optional) + The scope of Delta Sharing enabled for the metastore. + :param name: str (optional) + The user-specified name of the metastore. + :param new_name: str (optional) + New name for the metastore. + :param owner: str (optional) + The owner of the metastore. + :param privilege_model_version: str (optional) + Privilege model version of the metastore, of the form `major.minor` (e.g., `1.0`). + :param storage_root_credential_id: str (optional) + UUID of storage credential to access the metastore storage_root. + + :returns: :class:`MetastoreInfo` + + + .. py:method:: update_assignment(workspace_id: int [, default_catalog_name: Optional[str], metastore_id: Optional[str]]) + + Update an assignment. + + Updates a metastore assignment. This operation can be used to update __metastore_id__ or + __default_catalog_name__ for a specified Workspace, if the Workspace is already assigned a metastore. + The caller must be an account admin to update __metastore_id__; otherwise, the caller can be a + Workspace admin. + + :param workspace_id: int + A workspace ID. + :param default_catalog_name: str (optional) + The name of the default catalog for the metastore. + :param metastore_id: str (optional) + The unique ID of the metastore. + + + \ No newline at end of file diff --git a/docs/workspace/catalog/model_versions.rst b/docs/workspace/catalog/model_versions.rst new file mode 100644 index 000000000..9b609f304 --- /dev/null +++ b/docs/workspace/catalog/model_versions.rst @@ -0,0 +1,118 @@ +``w.model_versions``: Model Versions +==================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: ModelVersionsAPI + + Databricks provides a hosted version of MLflow Model Registry in Unity Catalog. Models in Unity Catalog + provide centralized access control, auditing, lineage, and discovery of ML models across Databricks + workspaces. + + This API reference documents the REST endpoints for managing model versions in Unity Catalog. For more + details, see the [registered models API docs](/api/workspace/registeredmodels). + + .. py:method:: delete(full_name: str, version: int) + + Delete a Model Version. + + Deletes a model version from the specified registered model. Any aliases assigned to the model version + will also be deleted. + + The caller must be a metastore admin or an owner of the parent registered model. For the latter case, + the caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + The three-level (fully qualified) name of the model version + :param version: int + The integer version number of the model version + + + + + .. py:method:: get(full_name: str, version: int) -> RegisteredModelInfo + + Get a Model Version. + + Get a model version. + + The caller must be a metastore admin or an owner of (or have the **EXECUTE** privilege on) the parent + registered model. For the latter case, the caller must also be the owner or have the **USE_CATALOG** + privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + The three-level (fully qualified) name of the model version + :param version: int + The integer version number of the model version + + :returns: :class:`RegisteredModelInfo` + + + .. py:method:: get_by_alias(full_name: str, alias: str) -> ModelVersionInfo + + Get Model Version By Alias. + + Get a model version by alias. + + The caller must be a metastore admin or an owner of (or have the **EXECUTE** privilege on) the + registered model. For the latter case, the caller must also be the owner or have the **USE_CATALOG** + privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + The three-level (fully qualified) name of the registered model + :param alias: str + The name of the alias + + :returns: :class:`ModelVersionInfo` + + + .. py:method:: list(full_name: str [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[ModelVersionInfo] + + List Model Versions. + + List model versions. You can list model versions under a particular schema, or list all model versions + in the current metastore. + + The returned models are filtered based on the privileges of the calling user. For example, the + metastore admin is able to list all the model versions. A regular user needs to be the owner or have + the **EXECUTE** privilege on the parent registered model to recieve the model versions in the + response. For the latter case, the caller must also be the owner or have the **USE_CATALOG** privilege + on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + + There is no guarantee of a specific ordering of the elements in the response. + + :param full_name: str + The full three-level name of the registered model under which to list model versions + :param max_results: int (optional) + Maximum number of model versions to return. If not set, the page length is set to a server + configured value (100, as of 1/3/2024). - when set to a value greater than 0, the page length is the + minimum of this value and a server configured value(1000, as of 1/3/2024); - when set to 0, the page + length is set to a server configured value (100, as of 1/3/2024) (recommended); - when set to a + value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`ModelVersionInfo` + + + .. py:method:: update(full_name: str, version: int [, comment: Optional[str]]) -> ModelVersionInfo + + Update a Model Version. + + Updates the specified model version. + + The caller must be a metastore admin or an owner of the parent registered model. For the latter case, + the caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + Currently only the comment of the model version can be updated. + + :param full_name: str + The three-level (fully qualified) name of the model version + :param version: int + The integer version number of the model version + :param comment: str (optional) + The comment attached to the model version + + :returns: :class:`ModelVersionInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/registered_models.rst b/docs/workspace/catalog/registered_models.rst new file mode 100644 index 000000000..784e0d272 --- /dev/null +++ b/docs/workspace/catalog/registered_models.rst @@ -0,0 +1,185 @@ +``w.registered_models``: Registered Models +========================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: RegisteredModelsAPI + + Databricks provides a hosted version of MLflow Model Registry in Unity Catalog. Models in Unity Catalog + provide centralized access control, auditing, lineage, and discovery of ML models across Databricks + workspaces. + + An MLflow registered model resides in the third layer of Unity Catalog’s three-level namespace. + Registered models contain model versions, which correspond to actual ML models (MLflow models). Creating + new model versions currently requires use of the MLflow Python client. Once model versions are created, + you can load them for batch inference using MLflow Python client APIs, or deploy them for real-time + serving using Databricks Model Serving. + + All operations on registered models and model versions require USE_CATALOG permissions on the enclosing + catalog and USE_SCHEMA permissions on the enclosing schema. In addition, the following additional + privileges are required for various operations: + + * To create a registered model, users must additionally have the CREATE_MODEL permission on the target + schema. * To view registered model or model version metadata, model version data files, or invoke a model + version, users must additionally have the EXECUTE permission on the registered model * To update + registered model or model version tags, users must additionally have APPLY TAG permissions on the + registered model * To update other registered model or model version metadata (comments, aliases) create a + new model version, or update permissions on the registered model, users must be owners of the registered + model. + + Note: The securable type for models is "FUNCTION". When using REST APIs (e.g. tagging, grants) that + specify a securable type, use "FUNCTION" as the securable type. + + .. py:method:: create(catalog_name: str, schema_name: str, name: str [, comment: Optional[str], storage_location: Optional[str]]) -> RegisteredModelInfo + + Create a Registered Model. + + Creates a new registered model in Unity Catalog. + + File storage for model versions in the registered model will be located in the default location which + is specified by the parent schema, or the parent catalog, or the Metastore. + + For registered model creation to succeed, the user must satisfy the following conditions: - The caller + must be a metastore admin, or be the owner of the parent catalog and schema, or have the + **USE_CATALOG** privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + - The caller must have the **CREATE MODEL** or **CREATE FUNCTION** privilege on the parent schema. + + :param catalog_name: str + The name of the catalog where the schema and the registered model reside + :param schema_name: str + The name of the schema where the registered model resides + :param name: str + The name of the registered model + :param comment: str (optional) + The comment attached to the registered model + :param storage_location: str (optional) + The storage location on the cloud under which model version data files are stored + + :returns: :class:`RegisteredModelInfo` + + + .. py:method:: delete(full_name: str) + + Delete a Registered Model. + + Deletes a registered model and all its model versions from the specified parent catalog and schema. + + The caller must be a metastore admin or an owner of the registered model. For the latter case, the + caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + The three-level (fully qualified) name of the registered model + + + + + .. py:method:: delete_alias(full_name: str, alias: str) + + Delete a Registered Model Alias. + + Deletes a registered model alias. + + The caller must be a metastore admin or an owner of the registered model. For the latter case, the + caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + The three-level (fully qualified) name of the registered model + :param alias: str + The name of the alias + + + + + .. py:method:: get(full_name: str) -> RegisteredModelInfo + + Get a Registered Model. + + Get a registered model. + + The caller must be a metastore admin or an owner of (or have the **EXECUTE** privilege on) the + registered model. For the latter case, the caller must also be the owner or have the **USE_CATALOG** + privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + The three-level (fully qualified) name of the registered model + + :returns: :class:`RegisteredModelInfo` + + + .. py:method:: list( [, catalog_name: Optional[str], max_results: Optional[int], page_token: Optional[str], schema_name: Optional[str]]) -> Iterator[RegisteredModelInfo] + + List Registered Models. + + List registered models. You can list registered models under a particular schema, or list all + registered models in the current metastore. + + The returned models are filtered based on the privileges of the calling user. For example, the + metastore admin is able to list all the registered models. A regular user needs to be the owner or + have the **EXECUTE** privilege on the registered model to recieve the registered models in the + response. For the latter case, the caller must also be the owner or have the **USE_CATALOG** privilege + on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + + There is no guarantee of a specific ordering of the elements in the response. + + :param catalog_name: str (optional) + The identifier of the catalog under which to list registered models. If specified, schema_name must + be specified. + :param max_results: int (optional) + Max number of registered models to return. If catalog and schema are unspecified, max_results must + be specified. If max_results is unspecified, we return all results, starting from the page specified + by page_token. + :param page_token: str (optional) + Opaque token to send for the next page of results (pagination). + :param schema_name: str (optional) + The identifier of the schema under which to list registered models. If specified, catalog_name must + be specified. + + :returns: Iterator over :class:`RegisteredModelInfo` + + + .. py:method:: set_alias(full_name: str, alias: str, version_num: int) -> RegisteredModelAlias + + Set a Registered Model Alias. + + Set an alias on the specified registered model. + + The caller must be a metastore admin or an owner of the registered model. For the latter case, the + caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + Full name of the registered model + :param alias: str + The name of the alias + :param version_num: int + The version number of the model version to which the alias points + + :returns: :class:`RegisteredModelAlias` + + + .. py:method:: update(full_name: str [, comment: Optional[str], name: Optional[str], new_name: Optional[str], owner: Optional[str]]) -> RegisteredModelInfo + + Update a Registered Model. + + Updates the specified registered model. + + The caller must be a metastore admin or an owner of the registered model. For the latter case, the + caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + Currently only the name, the owner or the comment of the registered model can be updated. + + :param full_name: str + The three-level (fully qualified) name of the registered model + :param comment: str (optional) + The comment attached to the registered model + :param name: str (optional) + The name of the registered model + :param new_name: str (optional) + New name for the registered model. + :param owner: str (optional) + The identifier of the user who owns the registered model + + :returns: :class:`RegisteredModelInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/schemas.rst b/docs/workspace/catalog/schemas.rst new file mode 100644 index 000000000..46ea49ff4 --- /dev/null +++ b/docs/workspace/catalog/schemas.rst @@ -0,0 +1,186 @@ +``w.schemas``: Schemas +====================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: SchemasAPI + + A schema (also called a database) is the second layer of Unity Catalog’s three-level namespace. A schema + organizes tables, views and functions. To access (or list) a table or view in a schema, users must have + the USE_SCHEMA data permission on the schema and its parent catalog, and they must have the SELECT + permission on the table or view. + + .. py:method:: create(name: str, catalog_name: str [, comment: Optional[str], properties: Optional[Dict[str, str]], storage_root: Optional[str]]) -> SchemaInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + # cleanup + w.catalogs.delete(name=created_catalog.name, force=True) + w.schemas.delete(full_name=created_schema.full_name) + + Create a schema. + + Creates a new schema for catalog in the Metatastore. The caller must be a metastore admin, or have the + **CREATE_SCHEMA** privilege in the parent catalog. + + :param name: str + Name of schema, relative to parent catalog. + :param catalog_name: str + Name of parent catalog. + :param comment: str (optional) + User-provided free-form text description. + :param properties: Dict[str,str] (optional) + A map of key-value properties attached to the securable. + :param storage_root: str (optional) + Storage root URL for managed tables within schema. + + :returns: :class:`SchemaInfo` + + + .. py:method:: delete(full_name: str) + + Delete a schema. + + Deletes the specified schema from the parent catalog. The caller must be the owner of the schema or an + owner of the parent catalog. + + :param full_name: str + Full name of the schema. + + + + + .. py:method:: get(full_name: str) -> SchemaInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + new_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=new_catalog.name) + + _ = w.schemas.get(full_name=created.full_name) + + # cleanup + w.catalogs.delete(name=new_catalog.name, force=True) + w.schemas.delete(full_name=created.full_name) + + Get a schema. + + Gets the specified schema within the metastore. The caller must be a metastore admin, the owner of the + schema, or a user that has the **USE_SCHEMA** privilege on the schema. + + :param full_name: str + Full name of the schema. + + :returns: :class:`SchemaInfo` + + + .. py:method:: list(catalog_name: str [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[SchemaInfo] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + new_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + all = w.schemas.list(catalog_name=new_catalog.name) + + # cleanup + w.catalogs.delete(name=new_catalog.name, force=True) + + List schemas. + + Gets an array of schemas for a catalog in the metastore. If the caller is the metastore admin or the + owner of the parent catalog, all schemas for the catalog will be retrieved. Otherwise, only schemas + owned by the caller (or for which the caller has the **USE_SCHEMA** privilege) will be retrieved. For + unpaginated request, there is no guarantee of a specific ordering of the elements in the array. For + paginated request, elements are ordered by their name. + + :param catalog_name: str + Parent catalog for schemas of interest. + :param max_results: int (optional) + Maximum number of schemas to return. If not set, all the schemas are returned (not recommended). - + when set to a value greater than 0, the page length is the minimum of this value and a server + configured value; - when set to 0, the page length is set to a server configured value + (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`SchemaInfo` + + + .. py:method:: update(full_name: str [, comment: Optional[str], enable_predictive_optimization: Optional[EnablePredictiveOptimization], name: Optional[str], new_name: Optional[str], owner: Optional[str], properties: Optional[Dict[str, str]]]) -> SchemaInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + new_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=new_catalog.name) + + _ = w.schemas.update(full_name=created.full_name, comment=f'sdk-{time.time_ns()}') + + # cleanup + w.catalogs.delete(name=new_catalog.name, force=True) + w.schemas.delete(full_name=created.full_name) + + Update a schema. + + Updates a schema for a catalog. The caller must be the owner of the schema or a metastore admin. If + the caller is a metastore admin, only the __owner__ field can be changed in the update. If the + __name__ field must be updated, the caller must be a metastore admin or have the **CREATE_SCHEMA** + privilege on the parent catalog. + + :param full_name: str + Full name of the schema. + :param comment: str (optional) + User-provided free-form text description. + :param enable_predictive_optimization: :class:`EnablePredictiveOptimization` (optional) + Whether predictive optimization should be enabled for this object and objects under it. + :param name: str (optional) + Name of schema, relative to parent catalog. + :param new_name: str (optional) + New name for the schema. + :param owner: str (optional) + Username of current owner of schema. + :param properties: Dict[str,str] (optional) + A map of key-value properties attached to the securable. + + :returns: :class:`SchemaInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/storage_credentials.rst b/docs/workspace/catalog/storage_credentials.rst new file mode 100644 index 000000000..42a830f09 --- /dev/null +++ b/docs/workspace/catalog/storage_credentials.rst @@ -0,0 +1,242 @@ +``w.storage_credentials``: Storage Credentials +============================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: StorageCredentialsAPI + + A storage credential represents an authentication and authorization mechanism for accessing data stored on + your cloud tenant. Each storage credential is subject to Unity Catalog access-control policies that + control which users and groups can access the credential. If a user does not have access to a storage + credential in Unity Catalog, the request fails and Unity Catalog does not attempt to authenticate to your + cloud tenant on the user’s behalf. + + Databricks recommends using external locations rather than using storage credentials directly. + + To create storage credentials, you must be a Databricks account admin. The account admin who creates the + storage credential can delegate ownership to another user or group to manage permissions on it. + + .. py:method:: create(name: str [, aws_iam_role: Optional[AwsIamRole], azure_managed_identity: Optional[AzureManagedIdentity], azure_service_principal: Optional[AzureServicePrincipal], cloudflare_api_token: Optional[CloudflareApiToken], comment: Optional[str], databricks_gcp_service_account: Optional[Any], read_only: Optional[bool], skip_validation: Optional[bool]]) -> StorageCredentialInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + created = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + # cleanup + w.storage_credentials.delete(delete=created.name) + + Create a storage credential. + + Creates a new storage credential. + + :param name: str + The credential name. The name must be unique within the metastore. + :param aws_iam_role: :class:`AwsIamRole` (optional) + The AWS IAM role configuration. + :param azure_managed_identity: :class:`AzureManagedIdentity` (optional) + The Azure managed identity configuration. + :param azure_service_principal: :class:`AzureServicePrincipal` (optional) + The Azure service principal configuration. + :param cloudflare_api_token: :class:`CloudflareApiToken` (optional) + The Cloudflare API token configuration. + :param comment: str (optional) + Comment associated with the credential. + :param databricks_gcp_service_account: Any (optional) + The managed GCP service account configuration. + :param read_only: bool (optional) + Whether the storage credential is only usable for read operations. + :param skip_validation: bool (optional) + Supplying true to this argument skips validation of the created credential. + + :returns: :class:`StorageCredentialInfo` + + + .. py:method:: delete(name: str [, force: Optional[bool]]) + + Delete a credential. + + Deletes a storage credential from the metastore. The caller must be an owner of the storage + credential. + + :param name: str + Name of the storage credential. + :param force: bool (optional) + Force deletion even if there are dependent external locations or external tables. + + + + + .. py:method:: get(name: str) -> StorageCredentialInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + created = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + by_name = w.storage_credentials.get(name=created.name) + + # cleanup + w.storage_credentials.delete(name=created.name) + + Get a credential. + + Gets a storage credential from the metastore. The caller must be a metastore admin, the owner of the + storage credential, or have some permission on the storage credential. + + :param name: str + Name of the storage credential. + + :returns: :class:`StorageCredentialInfo` + + + .. py:method:: list( [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[StorageCredentialInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.storage_credentials.list() + + List credentials. + + Gets an array of storage credentials (as __StorageCredentialInfo__ objects). The array is limited to + only those storage credentials the caller has permission to access. If the caller is a metastore + admin, retrieval of credentials is unrestricted. For unpaginated request, there is no guarantee of a + specific ordering of the elements in the array. For paginated request, elements are ordered by their + name. + + :param max_results: int (optional) + Maximum number of storage credentials to return. If not set, all the storage credentials are + returned (not recommended). - when set to a value greater than 0, the page length is the minimum of + this value and a server configured value; - when set to 0, the page length is set to a server + configured value (recommended); - when set to a value less than 0, an invalid parameter error is + returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`StorageCredentialInfo` + + + .. py:method:: update(name: str [, aws_iam_role: Optional[AwsIamRole], azure_managed_identity: Optional[AzureManagedIdentity], azure_service_principal: Optional[AzureServicePrincipal], cloudflare_api_token: Optional[CloudflareApiToken], comment: Optional[str], databricks_gcp_service_account: Optional[Any], force: Optional[bool], new_name: Optional[str], owner: Optional[str], read_only: Optional[bool], skip_validation: Optional[bool]]) -> StorageCredentialInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + created = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + _ = w.storage_credentials.update( + name=created.name, + comment=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"])) + + # cleanup + w.storage_credentials.delete(delete=created.name) + + Update a credential. + + Updates a storage credential on the metastore. + + :param name: str + Name of the storage credential. + :param aws_iam_role: :class:`AwsIamRole` (optional) + The AWS IAM role configuration. + :param azure_managed_identity: :class:`AzureManagedIdentity` (optional) + The Azure managed identity configuration. + :param azure_service_principal: :class:`AzureServicePrincipal` (optional) + The Azure service principal configuration. + :param cloudflare_api_token: :class:`CloudflareApiToken` (optional) + The Cloudflare API token configuration. + :param comment: str (optional) + Comment associated with the credential. + :param databricks_gcp_service_account: Any (optional) + The managed GCP service account configuration. + :param force: bool (optional) + Force update even if there are dependent external locations or external tables. + :param new_name: str (optional) + New name for the storage credential. + :param owner: str (optional) + Username of current owner of credential. + :param read_only: bool (optional) + Whether the storage credential is only usable for read operations. + :param skip_validation: bool (optional) + Supplying true to this argument skips validation of the updated credential. + + :returns: :class:`StorageCredentialInfo` + + + .. py:method:: validate( [, aws_iam_role: Optional[AwsIamRole], azure_managed_identity: Optional[AzureManagedIdentity], azure_service_principal: Optional[AzureServicePrincipal], cloudflare_api_token: Optional[CloudflareApiToken], databricks_gcp_service_account: Optional[Any], external_location_name: Optional[str], read_only: Optional[bool], storage_credential_name: Optional[Any], url: Optional[str]]) -> ValidateStorageCredentialResponse + + Validate a storage credential. + + Validates a storage credential. At least one of __external_location_name__ and __url__ need to be + provided. If only one of them is provided, it will be used for validation. And if both are provided, + the __url__ will be used for validation, and __external_location_name__ will be ignored when checking + overlapping urls. + + Either the __storage_credential_name__ or the cloud-specific credential must be provided. + + The caller must be a metastore admin or the storage credential owner or have the + **CREATE_EXTERNAL_LOCATION** privilege on the metastore and the storage credential. + + :param aws_iam_role: :class:`AwsIamRole` (optional) + The AWS IAM role configuration. + :param azure_managed_identity: :class:`AzureManagedIdentity` (optional) + The Azure managed identity configuration. + :param azure_service_principal: :class:`AzureServicePrincipal` (optional) + The Azure service principal configuration. + :param cloudflare_api_token: :class:`CloudflareApiToken` (optional) + The Cloudflare API token configuration. + :param databricks_gcp_service_account: Any (optional) + The Databricks created GCP service account configuration. + :param external_location_name: str (optional) + The name of an existing external location to validate. + :param read_only: bool (optional) + Whether the storage credential is only usable for read operations. + :param storage_credential_name: Any (optional) + The name of the storage credential to validate. + :param url: str (optional) + The external location url to validate. + + :returns: :class:`ValidateStorageCredentialResponse` + \ No newline at end of file diff --git a/docs/workspace/catalog/system_schemas.rst b/docs/workspace/catalog/system_schemas.rst new file mode 100644 index 000000000..13c4a56df --- /dev/null +++ b/docs/workspace/catalog/system_schemas.rst @@ -0,0 +1,51 @@ +``w.system_schemas``: SystemSchemas +=================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: SystemSchemasAPI + + A system schema is a schema that lives within the system catalog. A system schema may contain information + about customer usage of Unity Catalog such as audit-logs, billing-logs, lineage information, etc. + + .. py:method:: disable(metastore_id: str, schema_name: DisableSchemaName) + + Disable a system schema. + + Disables the system schema and removes it from the system catalog. The caller must be an account admin + or a metastore admin. + + :param metastore_id: str + The metastore ID under which the system schema lives. + :param schema_name: :class:`DisableSchemaName` + Full name of the system schema. + + + + + .. py:method:: enable(metastore_id: str, schema_name: EnableSchemaName) + + Enable a system schema. + + Enables the system schema and adds it to the system catalog. The caller must be an account admin or a + metastore admin. + + :param metastore_id: str + The metastore ID under which the system schema lives. + :param schema_name: :class:`EnableSchemaName` + Full name of the system schema. + + + + + .. py:method:: list(metastore_id: str) -> Iterator[SystemSchemaInfo] + + List system schemas. + + Gets an array of system schemas for a metastore. The caller must be an account admin or a metastore + admin. + + :param metastore_id: str + The ID for the metastore in which the system schema resides. + + :returns: Iterator over :class:`SystemSchemaInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/table_constraints.rst b/docs/workspace/catalog/table_constraints.rst new file mode 100644 index 000000000..dd46c42f3 --- /dev/null +++ b/docs/workspace/catalog/table_constraints.rst @@ -0,0 +1,62 @@ +``w.table_constraints``: Table Constraints +========================================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: TableConstraintsAPI + + Primary key and foreign key constraints encode relationships between fields in tables. + + Primary and foreign keys are informational only and are not enforced. Foreign keys must reference a + primary key in another table. This primary key is the parent constraint of the foreign key and the table + this primary key is on is the parent table of the foreign key. Similarly, the foreign key is the child + constraint of its referenced primary key; the table of the foreign key is the child table of the primary + key. + + You can declare primary keys and foreign keys as part of the table specification during table creation. + You can also add or drop constraints on existing tables. + + .. py:method:: create(full_name_arg: str, constraint: TableConstraint) -> TableConstraint + + Create a table constraint. + + Creates a new table constraint. + + For the table constraint creation to succeed, the user must satisfy both of these conditions: - the + user must have the **USE_CATALOG** privilege on the table's parent catalog, the **USE_SCHEMA** + privilege on the table's parent schema, and be the owner of the table. - if the new constraint is a + __ForeignKeyConstraint__, the user must have the **USE_CATALOG** privilege on the referenced parent + table's catalog, the **USE_SCHEMA** privilege on the referenced parent table's schema, and be the + owner of the referenced parent table. + + :param full_name_arg: str + The full name of the table referenced by the constraint. + :param constraint: :class:`TableConstraint` + A table constraint, as defined by *one* of the following fields being set: + __primary_key_constraint__, __foreign_key_constraint__, __named_table_constraint__. + + :returns: :class:`TableConstraint` + + + .. py:method:: delete(full_name: str, constraint_name: str, cascade: bool) + + Delete a table constraint. + + Deletes a table constraint. + + For the table constraint deletion to succeed, the user must satisfy both of these conditions: - the + user must have the **USE_CATALOG** privilege on the table's parent catalog, the **USE_SCHEMA** + privilege on the table's parent schema, and be the owner of the table. - if __cascade__ argument is + **true**, the user must have the following permissions on all of the child tables: the **USE_CATALOG** + privilege on the table's catalog, the **USE_SCHEMA** privilege on the table's schema, and be the owner + of the table. + + :param full_name: str + Full name of the table referenced by the constraint. + :param constraint_name: str + The name of the constraint to delete. + :param cascade: bool + If true, try deleting all child constraints of the current constraint. If false, reject this + operation if the current constraint has any child constraints. + + + \ No newline at end of file diff --git a/docs/workspace/catalog/tables.rst b/docs/workspace/catalog/tables.rst new file mode 100644 index 000000000..90b51ca57 --- /dev/null +++ b/docs/workspace/catalog/tables.rst @@ -0,0 +1,201 @@ +``w.tables``: Tables +==================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: TablesAPI + + A table resides in the third layer of Unity Catalog’s three-level namespace. It contains rows of data. + To create a table, users must have CREATE_TABLE and USE_SCHEMA permissions on the schema, and they must + have the USE_CATALOG permission on its parent catalog. To query a table, users must have the SELECT + permission on the table, and they must have the USE_CATALOG permission on its parent catalog and the + USE_SCHEMA permission on its parent schema. + + A table can be managed or external. From an API perspective, a __VIEW__ is a particular kind of table + (rather than a managed or external table). + + .. py:method:: delete(full_name: str) + + Delete a table. + + Deletes a table from the specified parent catalog and schema. The caller must be the owner of the + parent catalog, have the **USE_CATALOG** privilege on the parent catalog and be the owner of the + parent schema, or be the owner of the table and have the **USE_CATALOG** privilege on the parent + catalog and the **USE_SCHEMA** privilege on the parent schema. + + :param full_name: str + Full name of the table. + + + + + .. py:method:: get(full_name: str [, include_delta_metadata: Optional[bool]]) -> TableInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + table_name = f'sdk-{time.time_ns()}' + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + _ = w.statement_execution.execute(warehouse_id=os.environ["TEST_DEFAULT_WAREHOUSE_ID"], + catalog=created_catalog.name, + schema=created_schema.name, + statement="CREATE TABLE %s AS SELECT 2+2 as four" % (table_name)).result() + + table_full_name = "%s.%s.%s" % (created_catalog.name, created_schema.name, table_name) + + created_table = w.tables.get(full_name=table_full_name) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.tables.delete(full_name=table_full_name) + + Get a table. + + Gets a table from the metastore for a specific catalog and schema. The caller must be a metastore + admin, be the owner of the table and have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema, or be the owner of the table and have the **SELECT** + privilege on it as well. + + :param full_name: str + Full name of the table. + :param include_delta_metadata: bool (optional) + Whether delta metadata should be included in the response. + + :returns: :class:`TableInfo` + + + .. py:method:: list(catalog_name: str, schema_name: str [, include_delta_metadata: Optional[bool], max_results: Optional[int], omit_columns: Optional[bool], omit_properties: Optional[bool], page_token: Optional[str]]) -> Iterator[TableInfo] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + all_tables = w.tables.list(catalog_name=created_catalog.name, schema_name=created_schema.name) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + + List tables. + + Gets an array of all tables for the current metastore under the parent catalog and schema. The caller + must be a metastore admin or an owner of (or have the **SELECT** privilege on) the table. For the + latter case, the caller must also be the owner or have the **USE_CATALOG** privilege on the parent + catalog and the **USE_SCHEMA** privilege on the parent schema. There is no guarantee of a specific + ordering of the elements in the array. + + :param catalog_name: str + Name of parent catalog for tables of interest. + :param schema_name: str + Parent schema of tables. + :param include_delta_metadata: bool (optional) + Whether delta metadata should be included in the response. + :param max_results: int (optional) + Maximum number of tables to return. If not set, all the tables are returned (not recommended). - + when set to a value greater than 0, the page length is the minimum of this value and a server + configured value; - when set to 0, the page length is set to a server configured value + (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param omit_columns: bool (optional) + Whether to omit the columns of the table from the response or not. + :param omit_properties: bool (optional) + Whether to omit the properties of the table from the response or not. + :param page_token: str (optional) + Opaque token to send for the next page of results (pagination). + + :returns: Iterator over :class:`TableInfo` + + + .. py:method:: list_summaries(catalog_name: str [, max_results: Optional[int], page_token: Optional[str], schema_name_pattern: Optional[str], table_name_pattern: Optional[str]]) -> Iterator[TableSummary] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + summaries = w.tables.list_summaries(catalog_name=created_catalog.name, + schema_name_pattern=created_schema.name) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + + List table summaries. + + Gets an array of summaries for tables for a schema and catalog within the metastore. The table + summaries returned are either: + + * summaries for tables (within the current metastore and parent catalog and schema), when the user is + a metastore admin, or: * summaries for tables and schemas (within the current metastore and parent + catalog) for which the user has ownership or the **SELECT** privilege on the table and ownership or + **USE_SCHEMA** privilege on the schema, provided that the user also has ownership or the + **USE_CATALOG** privilege on the parent catalog. + + There is no guarantee of a specific ordering of the elements in the array. + + :param catalog_name: str + Name of parent catalog for tables of interest. + :param max_results: int (optional) + Maximum number of summaries for tables to return. If not set, the page length is set to a server + configured value (10000, as of 1/5/2024). - when set to a value greater than 0, the page length is + the minimum of this value and a server configured value (10000, as of 1/5/2024); - when set to 0, + the page length is set to a server configured value (10000, as of 1/5/2024) (recommended); - when + set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + :param schema_name_pattern: str (optional) + A sql LIKE pattern (% and _) for schema names. All schemas will be returned if not set or empty. + :param table_name_pattern: str (optional) + A sql LIKE pattern (% and _) for table names. All tables will be returned if not set or empty. + + :returns: Iterator over :class:`TableSummary` + + + .. py:method:: update(full_name: str [, owner: Optional[str]]) + + Update a table owner. + + Change the owner of the table. The caller must be the owner of the parent catalog, have the + **USE_CATALOG** privilege on the parent catalog and be the owner of the parent schema, or be the owner + of the table and have the **USE_CATALOG** privilege on the parent catalog and the **USE_SCHEMA** + privilege on the parent schema. + + :param full_name: str + Full name of the table. + :param owner: str (optional) + + + \ No newline at end of file diff --git a/docs/workspace/catalog/volumes.rst b/docs/workspace/catalog/volumes.rst new file mode 100644 index 000000000..0e0426e5e --- /dev/null +++ b/docs/workspace/catalog/volumes.rst @@ -0,0 +1,277 @@ +``w.volumes``: Volumes +====================== +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: VolumesAPI + + Volumes are a Unity Catalog (UC) capability for accessing, storing, governing, organizing and processing + files. Use cases include running machine learning on unstructured data such as image, audio, video, or PDF + files, organizing data sets during the data exploration stages in data science, working with libraries + that require access to the local file system on cluster machines, storing library and config files of + arbitrary formats such as .whl or .txt centrally and providing secure access across workspaces to it, or + transforming and querying non-tabular data files in ETL. + + .. py:method:: create(catalog_name: str, schema_name: str, name: str, volume_type: VolumeType [, comment: Optional[str], storage_location: Optional[str]]) -> VolumeInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + storage_credential = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), + comment="created via SDK") + + external_location = w.external_locations.create(name=f'sdk-{time.time_ns()}', + credential_name=storage_credential.name, + comment="created via SDK", + url="s3://" + os.environ["TEST_BUCKET"] + "/" + + f'sdk-{time.time_ns()}') + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + created_volume = w.volumes.create(catalog_name=created_catalog.name, + schema_name=created_schema.name, + name=f'sdk-{time.time_ns()}', + storage_location=external_location.url, + volume_type=catalog.VolumeType.EXTERNAL) + + # cleanup + w.storage_credentials.delete(name=storage_credential.name) + w.external_locations.delete(name=external_location.name) + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.volumes.delete(full_name_arg=created_volume.full_name) + + Create a Volume. + + Creates a new volume. + + The user could create either an external volume or a managed volume. An external volume will be + created in the specified external location, while a managed volume will be located in the default + location which is specified by the parent schema, or the parent catalog, or the Metastore. + + For the volume creation to succeed, the user must satisfy following conditions: - The caller must be a + metastore admin, or be the owner of the parent catalog and schema, or have the **USE_CATALOG** + privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. - The caller + must have **CREATE VOLUME** privilege on the parent schema. + + For an external volume, following conditions also need to satisfy - The caller must have **CREATE + EXTERNAL VOLUME** privilege on the external location. - There are no other tables, nor volumes + existing in the specified storage location. - The specified storage location is not under the location + of other tables, nor volumes, or catalogs or schemas. + + :param catalog_name: str + The name of the catalog where the schema and the volume are + :param schema_name: str + The name of the schema where the volume is + :param name: str + The name of the volume + :param volume_type: :class:`VolumeType` + :param comment: str (optional) + The comment attached to the volume + :param storage_location: str (optional) + The storage location on the cloud + + :returns: :class:`VolumeInfo` + + + .. py:method:: delete(full_name_arg: str) + + Delete a Volume. + + Deletes a volume from the specified parent catalog and schema. + + The caller must be a metastore admin or an owner of the volume. For the latter case, the caller must + also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the **USE_SCHEMA** + privilege on the parent schema. + + :param full_name_arg: str + The three-level (fully qualified) name of the volume + + + + + .. py:method:: list(catalog_name: str, schema_name: str) -> Iterator[VolumeInfo] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + all_volumes = w.volumes.list(catalog_name=created_catalog.name, schema_name=created_schema.name) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + + List Volumes. + + Gets an array of all volumes for the current metastore under the parent catalog and schema. + + The returned volumes are filtered based on the privileges of the calling user. For example, the + metastore admin is able to list all the volumes. A regular user needs to be the owner or have the + **READ VOLUME** privilege on the volume to recieve the volumes in the response. For the latter case, + the caller must also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the + **USE_SCHEMA** privilege on the parent schema. + + There is no guarantee of a specific ordering of the elements in the array. + + :param catalog_name: str + The identifier of the catalog + :param schema_name: str + The identifier of the schema + + :returns: Iterator over :class:`VolumeInfo` + + + .. py:method:: read(full_name_arg: str) -> VolumeInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + storage_credential = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), + comment="created via SDK") + + external_location = w.external_locations.create(name=f'sdk-{time.time_ns()}', + credential_name=storage_credential.name, + comment="created via SDK", + url="s3://" + os.environ["TEST_BUCKET"] + "/" + + f'sdk-{time.time_ns()}') + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + created_volume = w.volumes.create(catalog_name=created_catalog.name, + schema_name=created_schema.name, + name=f'sdk-{time.time_ns()}', + storage_location=external_location.url, + volume_type=catalog.VolumeType.EXTERNAL) + + loaded_volume = w.volumes.read(full_name_arg=created_volume.full_name) + + # cleanup + w.storage_credentials.delete(name=storage_credential.name) + w.external_locations.delete(name=external_location.name) + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.volumes.delete(full_name_arg=created_volume.full_name) + + Get a Volume. + + Gets a volume from the metastore for a specific catalog and schema. + + The caller must be a metastore admin or an owner of (or have the **READ VOLUME** privilege on) the + volume. For the latter case, the caller must also be the owner or have the **USE_CATALOG** privilege + on the parent catalog and the **USE_SCHEMA** privilege on the parent schema. + + :param full_name_arg: str + The three-level (fully qualified) name of the volume + + :returns: :class:`VolumeInfo` + + + .. py:method:: update(full_name_arg: str [, comment: Optional[str], name: Optional[str], new_name: Optional[str], owner: Optional[str]]) -> VolumeInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog + + w = WorkspaceClient() + + storage_credential = w.storage_credentials.create( + name=f'sdk-{time.time_ns()}', + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), + comment="created via SDK") + + external_location = w.external_locations.create(name=f'sdk-{time.time_ns()}', + credential_name=storage_credential.name, + comment="created via SDK", + url="s3://" + os.environ["TEST_BUCKET"] + "/" + + f'sdk-{time.time_ns()}') + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + created_volume = w.volumes.create(catalog_name=created_catalog.name, + schema_name=created_schema.name, + name=f'sdk-{time.time_ns()}', + storage_location=external_location.url, + volume_type=catalog.VolumeType.EXTERNAL) + + loaded_volume = w.volumes.read(full_name_arg=created_volume.full_name) + + _ = w.volumes.update(full_name_arg=loaded_volume.full_name, comment="Updated volume comment") + + # cleanup + w.storage_credentials.delete(name=storage_credential.name) + w.external_locations.delete(name=external_location.name) + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.volumes.delete(full_name_arg=created_volume.full_name) + + Update a Volume. + + Updates the specified volume under the specified parent catalog and schema. + + The caller must be a metastore admin or an owner of the volume. For the latter case, the caller must + also be the owner or have the **USE_CATALOG** privilege on the parent catalog and the **USE_SCHEMA** + privilege on the parent schema. + + Currently only the name, the owner or the comment of the volume could be updated. + + :param full_name_arg: str + The three-level (fully qualified) name of the volume + :param comment: str (optional) + The comment attached to the volume + :param name: str (optional) + The name of the volume + :param new_name: str (optional) + New name for the volume. + :param owner: str (optional) + The identifier of the user who owns the volume + + :returns: :class:`VolumeInfo` + \ No newline at end of file diff --git a/docs/workspace/catalog/workspace_bindings.rst b/docs/workspace/catalog/workspace_bindings.rst new file mode 100644 index 000000000..e1ec753d4 --- /dev/null +++ b/docs/workspace/catalog/workspace_bindings.rst @@ -0,0 +1,123 @@ +``w.workspace_bindings``: Workspace Bindings +============================================ +.. currentmodule:: databricks.sdk.service.catalog + +.. py:class:: WorkspaceBindingsAPI + + A securable in Databricks can be configured as __OPEN__ or __ISOLATED__. An __OPEN__ securable can be + accessed from any workspace, while an __ISOLATED__ securable can only be accessed from a configured list + of workspaces. This API allows you to configure (bind) securables to workspaces. + + NOTE: The __isolation_mode__ is configured for the securable itself (using its Update method) and the + workspace bindings are only consulted when the securable's __isolation_mode__ is set to __ISOLATED__. + + A securable's workspace bindings can be configured by a metastore admin or the owner of the securable. + + The original path (/api/2.1/unity-catalog/workspace-bindings/catalogs/{name}) is deprecated. Please use + the new path (/api/2.1/unity-catalog/bindings/{securable_type}/{securable_name}) which introduces the + ability to bind a securable in READ_ONLY mode (catalogs only). + + Securables that support binding: - catalog + + .. py:method:: get(name: str) -> CurrentWorkspaceBindings + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + bindings = w.workspace_bindings.get(name=created.name) + + # cleanup + w.catalogs.delete(name=created.name, force=True) + + Get catalog workspace bindings. + + Gets workspace bindings of the catalog. The caller must be a metastore admin or an owner of the + catalog. + + :param name: str + The name of the catalog. + + :returns: :class:`CurrentWorkspaceBindings` + + + .. py:method:: get_bindings(securable_type: str, securable_name: str) -> WorkspaceBindingsResponse + + Get securable workspace bindings. + + Gets workspace bindings of the securable. The caller must be a metastore admin or an owner of the + securable. + + :param securable_type: str + The type of the securable. + :param securable_name: str + The name of the securable. + + :returns: :class:`WorkspaceBindingsResponse` + + + .. py:method:: update(name: str [, assign_workspaces: Optional[List[int]], unassign_workspaces: Optional[List[int]]]) -> CurrentWorkspaceBindings + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + this_workspace_id = os.environ["THIS_WORKSPACE_ID"] + + created = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + _ = w.workspace_bindings.update(name=created.name, assign_workspaces=[this_workspace_id]) + + # cleanup + w.catalogs.delete(name=created.name, force=True) + + Update catalog workspace bindings. + + Updates workspace bindings of the catalog. The caller must be a metastore admin or an owner of the + catalog. + + :param name: str + The name of the catalog. + :param assign_workspaces: List[int] (optional) + A list of workspace IDs. + :param unassign_workspaces: List[int] (optional) + A list of workspace IDs. + + :returns: :class:`CurrentWorkspaceBindings` + + + .. py:method:: update_bindings(securable_type: str, securable_name: str [, add: Optional[List[WorkspaceBinding]], remove: Optional[List[WorkspaceBinding]]]) -> WorkspaceBindingsResponse + + Update securable workspace bindings. + + Updates workspace bindings of the securable. The caller must be a metastore admin or an owner of the + securable. + + :param securable_type: str + The type of the securable. + :param securable_name: str + The name of the securable. + :param add: List[:class:`WorkspaceBinding`] (optional) + List of workspace bindings + :param remove: List[:class:`WorkspaceBinding`] (optional) + List of workspace bindings + + :returns: :class:`WorkspaceBindingsResponse` + \ No newline at end of file diff --git a/docs/workspace/compute/cluster_policies.rst b/docs/workspace/compute/cluster_policies.rst new file mode 100644 index 000000000..f5c27d5c2 --- /dev/null +++ b/docs/workspace/compute/cluster_policies.rst @@ -0,0 +1,289 @@ +``w.cluster_policies``: Cluster Policies +======================================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: ClusterPoliciesAPI + + You can use cluster policies to control users' ability to configure clusters based on a set of rules. + These rules specify which attributes or attribute values can be used during cluster creation. Cluster + policies have ACLs that limit their use to specific users and groups. + + With cluster policies, you can: - Auto-install cluster libraries on the next restart by listing them in + the policy's "libraries" field. - Limit users to creating clusters with the prescribed settings. - + Simplify the user interface, enabling more users to create clusters, by fixing and hiding some fields. - + Manage costs by setting limits on attributes that impact the hourly rate. + + Cluster policy permissions limit which policies a user can select in the Policy drop-down when the user + creates a cluster: - A user who has unrestricted cluster create permission can select the Unrestricted + policy and create fully-configurable clusters. - A user who has both unrestricted cluster create + permission and access to cluster policies can select the Unrestricted policy and policies they have access + to. - A user that has access to only cluster policies, can select the policies they have access to. + + If no policies exist in the workspace, the Policy drop-down doesn't appear. Only admin users can create, + edit, and delete policies. Admin users also have access to all policies. + + .. py:method:: create(name: str [, definition: Optional[str], description: Optional[str], libraries: Optional[List[Library]], max_clusters_per_user: Optional[int], policy_family_definition_overrides: Optional[str], policy_family_id: Optional[str]]) -> CreatePolicyResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.cluster_policies.create(name=f'sdk-{time.time_ns()}', + definition="""{ + "spark_conf.spark.databricks.delta.preview.enabled": { + "type": "fixed", + "value": true + } + } + """) + + # cleanup + w.cluster_policies.delete(policy_id=created.policy_id) + + Create a new policy. + + Creates a new policy with prescribed settings. + + :param name: str + Cluster Policy name requested by the user. This has to be unique. Length must be between 1 and 100 + characters. + :param definition: str (optional) + Policy definition document expressed in [Databricks Cluster Policy Definition Language]. + + [Databricks Cluster Policy Definition Language]: https://docs.databricks.com/administration-guide/clusters/policy-definition.html + :param description: str (optional) + Additional human-readable description of the cluster policy. + :param libraries: List[:class:`Library`] (optional) + A list of libraries to be installed on the next cluster restart that uses this policy. + :param max_clusters_per_user: int (optional) + Max number of clusters per user that can be active using this policy. If not present, there is no + max limit. + :param policy_family_definition_overrides: str (optional) + Policy definition JSON document expressed in [Databricks Policy Definition Language]. The JSON + document must be passed as a string and cannot be embedded in the requests. + + You can use this to customize the policy definition inherited from the policy family. Policy rules + specified here are merged into the inherited policy definition. + + [Databricks Policy Definition Language]: https://docs.databricks.com/administration-guide/clusters/policy-definition.html + :param policy_family_id: str (optional) + ID of the policy family. The cluster policy's policy definition inherits the policy family's policy + definition. + + Cannot be used with `definition`. Use `policy_family_definition_overrides` instead to customize the + policy definition. + + :returns: :class:`CreatePolicyResponse` + + + .. py:method:: delete(policy_id: str) + + Delete a cluster policy. + + Delete a policy for a cluster. Clusters governed by this policy can still run, but cannot be edited. + + :param policy_id: str + The ID of the policy to delete. + + + + + .. py:method:: edit(policy_id: str, name: str [, definition: Optional[str], description: Optional[str], libraries: Optional[List[Library]], max_clusters_per_user: Optional[int], policy_family_definition_overrides: Optional[str], policy_family_id: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.cluster_policies.create(name=f'sdk-{time.time_ns()}', + definition="""{ + "spark_conf.spark.databricks.delta.preview.enabled": { + "type": "fixed", + "value": true + } + } + """) + + policy = w.cluster_policies.get(policy_id=created.policy_id) + + w.cluster_policies.edit(policy_id=policy.policy_id, + name=policy.name, + definition="""{ + "spark_conf.spark.databricks.delta.preview.enabled": { + "type": "fixed", + "value": false + } + } + """) + + # cleanup + w.cluster_policies.delete(policy_id=created.policy_id) + + Update a cluster policy. + + Update an existing policy for cluster. This operation may make some clusters governed by the previous + policy invalid. + + :param policy_id: str + The ID of the policy to update. + :param name: str + Cluster Policy name requested by the user. This has to be unique. Length must be between 1 and 100 + characters. + :param definition: str (optional) + Policy definition document expressed in [Databricks Cluster Policy Definition Language]. + + [Databricks Cluster Policy Definition Language]: https://docs.databricks.com/administration-guide/clusters/policy-definition.html + :param description: str (optional) + Additional human-readable description of the cluster policy. + :param libraries: List[:class:`Library`] (optional) + A list of libraries to be installed on the next cluster restart that uses this policy. + :param max_clusters_per_user: int (optional) + Max number of clusters per user that can be active using this policy. If not present, there is no + max limit. + :param policy_family_definition_overrides: str (optional) + Policy definition JSON document expressed in [Databricks Policy Definition Language]. The JSON + document must be passed as a string and cannot be embedded in the requests. + + You can use this to customize the policy definition inherited from the policy family. Policy rules + specified here are merged into the inherited policy definition. + + [Databricks Policy Definition Language]: https://docs.databricks.com/administration-guide/clusters/policy-definition.html + :param policy_family_id: str (optional) + ID of the policy family. The cluster policy's policy definition inherits the policy family's policy + definition. + + Cannot be used with `definition`. Use `policy_family_definition_overrides` instead to customize the + policy definition. + + + + + .. py:method:: get(policy_id: str) -> Policy + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.cluster_policies.create(name=f'sdk-{time.time_ns()}', + definition="""{ + "spark_conf.spark.databricks.delta.preview.enabled": { + "type": "fixed", + "value": true + } + } + """) + + policy = w.cluster_policies.get(policy_id=created.policy_id) + + # cleanup + w.cluster_policies.delete(policy_id=created.policy_id) + + Get a cluster policy. + + Get a cluster policy entity. Creation and editing is available to admins only. + + :param policy_id: str + Canonical unique identifier for the cluster policy. + + :returns: :class:`Policy` + + + .. py:method:: get_permission_levels(cluster_policy_id: str) -> GetClusterPolicyPermissionLevelsResponse + + Get cluster policy permission levels. + + Gets the permission levels that a user can have on an object. + + :param cluster_policy_id: str + The cluster policy for which to get or manage permissions. + + :returns: :class:`GetClusterPolicyPermissionLevelsResponse` + + + .. py:method:: get_permissions(cluster_policy_id: str) -> ClusterPolicyPermissions + + Get cluster policy permissions. + + Gets the permissions of a cluster policy. Cluster policies can inherit permissions from their root + object. + + :param cluster_policy_id: str + The cluster policy for which to get or manage permissions. + + :returns: :class:`ClusterPolicyPermissions` + + + .. py:method:: list( [, sort_column: Optional[ListSortColumn], sort_order: Optional[ListSortOrder]]) -> Iterator[Policy] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import compute + + w = WorkspaceClient() + + all = w.cluster_policies.list(compute.ListClusterPoliciesRequest()) + + List cluster policies. + + Returns a list of policies accessible by the requesting user. + + :param sort_column: :class:`ListSortColumn` (optional) + The cluster policy attribute to sort by. * `POLICY_CREATION_TIME` - Sort result list by policy + creation time. * `POLICY_NAME` - Sort result list by policy name. + :param sort_order: :class:`ListSortOrder` (optional) + The order in which the policies get listed. * `DESC` - Sort result list in descending order. * `ASC` + - Sort result list in ascending order. + + :returns: Iterator over :class:`Policy` + + + .. py:method:: set_permissions(cluster_policy_id: str [, access_control_list: Optional[List[ClusterPolicyAccessControlRequest]]]) -> ClusterPolicyPermissions + + Set cluster policy permissions. + + Sets permissions on a cluster policy. Cluster policies can inherit permissions from their root object. + + :param cluster_policy_id: str + The cluster policy for which to get or manage permissions. + :param access_control_list: List[:class:`ClusterPolicyAccessControlRequest`] (optional) + + :returns: :class:`ClusterPolicyPermissions` + + + .. py:method:: update_permissions(cluster_policy_id: str [, access_control_list: Optional[List[ClusterPolicyAccessControlRequest]]]) -> ClusterPolicyPermissions + + Update cluster policy permissions. + + Updates the permissions on a cluster policy. Cluster policies can inherit permissions from their root + object. + + :param cluster_policy_id: str + The cluster policy for which to get or manage permissions. + :param access_control_list: List[:class:`ClusterPolicyAccessControlRequest`] (optional) + + :returns: :class:`ClusterPolicyPermissions` + \ No newline at end of file diff --git a/docs/workspace/compute/clusters.rst b/docs/workspace/compute/clusters.rst new file mode 100644 index 000000000..5fb2592e0 --- /dev/null +++ b/docs/workspace/compute/clusters.rst @@ -0,0 +1,1002 @@ +``w.clusters``: Clusters +======================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: ClustersExt + + The Clusters API allows you to create, start, edit, list, terminate, and delete clusters. + + Databricks maps cluster node instance types to compute units known as DBUs. See the instance type pricing + page for a list of the supported instance types and their corresponding DBUs. + + A Databricks cluster is a set of computation resources and configurations on which you run data + engineering, data science, and data analytics workloads, such as production ETL pipelines, streaming + analytics, ad-hoc analytics, and machine learning. + + You run these workloads as a set of commands in a notebook or as an automated job. Databricks makes a + distinction between all-purpose clusters and job clusters. You use all-purpose clusters to analyze data + collaboratively using interactive notebooks. You use job clusters to run fast and robust automated jobs. + + You can create an all-purpose cluster using the UI, CLI, or REST API. You can manually terminate and + restart an all-purpose cluster. Multiple users can share such clusters to do collaborative interactive + analysis. + + IMPORTANT: Databricks retains cluster configuration information for up to 200 all-purpose clusters + terminated in the last 30 days and up to 30 job clusters recently terminated by the job scheduler. To keep + an all-purpose cluster configuration even after it has been terminated for more than 30 days, an + administrator can pin a cluster to the cluster list. + + .. py:method:: change_owner(cluster_id: str, owner_username: str) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + other_owner = w.users.create(user_name=f'sdk-{time.time_ns()}@example.com') + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + w.clusters.change_owner(cluster_id=clstr.cluster_id, owner_username=other_owner.user_name) + + # cleanup + w.users.delete(id=other_owner.id) + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Change cluster owner. + + Change the owner of the cluster. You must be an admin to perform this operation. + + :param cluster_id: str + + :param owner_username: str + New owner of the cluster_id after this RPC. + + + + + .. py:method:: create(spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], cluster_source: Optional[ClusterSource], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType]]) -> Wait[ClusterDetails] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Create new cluster. + + Creates a new Spark cluster. This method will acquire new instances from the cloud provider if + necessary. Note: Databricks may not be able to acquire some of the requested nodes, due to cloud + provider limitations (account limits, spot price, etc.) or transient network issues. + + If Databricks acquires at least 85% of the requested on-demand nodes, cluster creation will succeed. + Otherwise the cluster will terminate with an informative error message. + + :param spark_version: str + The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be + retrieved by using the :method:clusters/sparkVersions API call. + :param apply_policy_default_values: bool (optional) + :param autoscale: :class:`AutoScale` (optional) + Parameters needed in order to automatically scale clusters up and down based on load. Note: + autoscaling works best with DB runtime versions 3.0 or later. + :param autotermination_minutes: int (optional) + Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this + cluster will not be automatically terminated. If specified, the threshold must be between 10 and + 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. + :param aws_attributes: :class:`AwsAttributes` (optional) + Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, + a set of default values will be used. + :param azure_attributes: :class:`AzureAttributes` (optional) + Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a + set of default values will be used. + :param cluster_log_conf: :class:`ClusterLogConf` (optional) + The configuration for delivering spark logs to a long-term storage destination. Two kinds of + destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If + the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of + driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is + `$destination/$clusterId/executor`. + :param cluster_name: str (optional) + Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, + the cluster name will be an empty string. + :param cluster_source: :class:`ClusterSource` (optional) + Determines whether the cluster was created by a user through the UI, created by the Databricks Jobs + Scheduler, or through an API request. This is the same as cluster_creator, but read only. + :param custom_tags: Dict[str,str] (optional) + Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS + instances and EBS volumes) with these tags in addition to `default_tags`. Notes: + + - Currently, Databricks allows at most 45 custom tags + + - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + :param data_security_mode: :class:`DataSecurityMode` (optional) + Data security mode decides what data governance model to use when accessing data from a cluster. + + * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are + not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a + single user specified in `single_user_name`. Most programming languages, cluster features and data + governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be + shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data + and credentials. Most data governance features are supported in this mode. But programming languages + and cluster features might be limited. * `LEGACY_TABLE_ACL`: This mode is for users migrating from + legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy + Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating + from legacy Passthrough on standard clusters. + :param docker_image: :class:`DockerImage` (optional) + :param driver_instance_pool_id: str (optional) + The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses + the instance pool with id (instance_pool_id) if the driver pool is not assigned. + :param driver_node_type_id: str (optional) + The node type of the Spark driver. Note that this field is optional; if unset, the driver node type + will be set as the same value as `node_type_id` defined above. + :param enable_elastic_disk: bool (optional) + Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space + when its Spark workers are running low on disk space. This feature requires specific AWS permissions + to function correctly - refer to the User Guide for more details. + :param enable_local_disk_encryption: bool (optional) + Whether to enable LUKS on cluster VMs' local disks + :param gcp_attributes: :class:`GcpAttributes` (optional) + Attributes related to clusters running on Google Cloud Platform. If not specified at cluster + creation, a set of default values will be used. + :param init_scripts: List[:class:`InitScriptInfo`] (optional) + The configuration for storing init scripts. Any number of destinations can be specified. The scripts + are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script + logs are sent to `//init_scripts`. + :param instance_pool_id: str (optional) + The optional ID of the instance pool to which the cluster belongs. + :param node_type_id: str (optional) + This field encodes, through a single value, the resources available to each of the Spark nodes in + this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute + intensive workloads. A list of available node types can be retrieved by using the + :method:clusters/listNodeTypes API call. + :param num_workers: int (optional) + Number of worker nodes that this cluster should have. A cluster has one Spark Driver and + `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. + + Note: When reading the properties of a cluster, this field reflects the desired number of workers + rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 + workers, this field will immediately be updated to reflect the target size of 10 workers, whereas + the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are + provisioned. + :param policy_id: str (optional) + The ID of the cluster policy used to create the cluster if applicable. + :param runtime_engine: :class:`RuntimeEngine` (optional) + Decides which runtime engine to be use, e.g. Standard vs. Photon. If unspecified, the runtime engine + is inferred from spark_version. + :param single_user_name: str (optional) + Single user name if data_security_mode is `SINGLE_USER` + :param spark_conf: Dict[str,str] (optional) + An object containing a set of optional, user-specified Spark configuration key-value pairs. Users + can also pass in a string of extra JVM options to the driver and the executors via + `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + :param spark_env_vars: Dict[str,str] (optional) + An object containing a set of optional, user-specified environment variable key-value pairs. Please + note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while + launching the driver and workers. + + In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to + `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks + managed environmental variables are included as well. + + Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": + "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS + -Dspark.shuffle.service.enabled=true"}` + :param ssh_public_keys: List[str] (optional) + SSH public key contents that will be added to each Spark node in this cluster. The corresponding + private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be + specified. + :param workload_type: :class:`WorkloadType` (optional) + + :returns: + Long-running operation waiter for :class:`ClusterDetails`. + See :method:wait_get_cluster_running for more details. + + + .. py:method:: create_and_wait(spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], cluster_source: Optional[ClusterSource], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType], timeout: datetime.timedelta = 0:20:00]) -> ClusterDetails + + + .. py:method:: delete(cluster_id: str) -> Wait[ClusterDetails] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + _ = w.clusters.delete(cluster_id=clstr.cluster_id).result() + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Terminate cluster. + + Terminates the Spark cluster with the specified ID. The cluster is removed asynchronously. Once the + termination has completed, the cluster will be in a `TERMINATED` state. If the cluster is already in a + `TERMINATING` or `TERMINATED` state, nothing will happen. + + :param cluster_id: str + The cluster to be terminated. + + :returns: + Long-running operation waiter for :class:`ClusterDetails`. + See :method:wait_get_cluster_terminated for more details. + + + .. py:method:: delete_and_wait(cluster_id: str, timeout: datetime.timedelta = 0:20:00) -> ClusterDetails + + + .. py:method:: edit(cluster_id: str, spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], cluster_source: Optional[ClusterSource], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType]]) -> Wait[ClusterDetails] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + cluster_name = f'sdk-{time.time_ns()}' + + latest = w.clusters.select_spark_version(latest=True) + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + _ = w.clusters.edit(cluster_id=clstr.cluster_id, + spark_version=latest, + cluster_name=cluster_name, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=10, + num_workers=2).result() + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Update cluster configuration. + + Updates the configuration of a cluster to match the provided attributes and size. A cluster can be + updated if it is in a `RUNNING` or `TERMINATED` state. + + If a cluster is updated while in a `RUNNING` state, it will be restarted so that the new attributes + can take effect. + + If a cluster is updated while in a `TERMINATED` state, it will remain `TERMINATED`. The next time it + is started using the `clusters/start` API, the new attributes will take effect. Any attempt to update + a cluster in any other state will be rejected with an `INVALID_STATE` error code. + + Clusters created by the Databricks Jobs service cannot be edited. + + :param cluster_id: str + ID of the cluser + :param spark_version: str + The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be + retrieved by using the :method:clusters/sparkVersions API call. + :param apply_policy_default_values: bool (optional) + :param autoscale: :class:`AutoScale` (optional) + Parameters needed in order to automatically scale clusters up and down based on load. Note: + autoscaling works best with DB runtime versions 3.0 or later. + :param autotermination_minutes: int (optional) + Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this + cluster will not be automatically terminated. If specified, the threshold must be between 10 and + 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. + :param aws_attributes: :class:`AwsAttributes` (optional) + Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, + a set of default values will be used. + :param azure_attributes: :class:`AzureAttributes` (optional) + Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a + set of default values will be used. + :param cluster_log_conf: :class:`ClusterLogConf` (optional) + The configuration for delivering spark logs to a long-term storage destination. Two kinds of + destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If + the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of + driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is + `$destination/$clusterId/executor`. + :param cluster_name: str (optional) + Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, + the cluster name will be an empty string. + :param cluster_source: :class:`ClusterSource` (optional) + Determines whether the cluster was created by a user through the UI, created by the Databricks Jobs + Scheduler, or through an API request. This is the same as cluster_creator, but read only. + :param custom_tags: Dict[str,str] (optional) + Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS + instances and EBS volumes) with these tags in addition to `default_tags`. Notes: + + - Currently, Databricks allows at most 45 custom tags + + - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + :param data_security_mode: :class:`DataSecurityMode` (optional) + Data security mode decides what data governance model to use when accessing data from a cluster. + + * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are + not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a + single user specified in `single_user_name`. Most programming languages, cluster features and data + governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be + shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data + and credentials. Most data governance features are supported in this mode. But programming languages + and cluster features might be limited. * `LEGACY_TABLE_ACL`: This mode is for users migrating from + legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy + Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating + from legacy Passthrough on standard clusters. + :param docker_image: :class:`DockerImage` (optional) + :param driver_instance_pool_id: str (optional) + The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses + the instance pool with id (instance_pool_id) if the driver pool is not assigned. + :param driver_node_type_id: str (optional) + The node type of the Spark driver. Note that this field is optional; if unset, the driver node type + will be set as the same value as `node_type_id` defined above. + :param enable_elastic_disk: bool (optional) + Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space + when its Spark workers are running low on disk space. This feature requires specific AWS permissions + to function correctly - refer to the User Guide for more details. + :param enable_local_disk_encryption: bool (optional) + Whether to enable LUKS on cluster VMs' local disks + :param gcp_attributes: :class:`GcpAttributes` (optional) + Attributes related to clusters running on Google Cloud Platform. If not specified at cluster + creation, a set of default values will be used. + :param init_scripts: List[:class:`InitScriptInfo`] (optional) + The configuration for storing init scripts. Any number of destinations can be specified. The scripts + are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script + logs are sent to `//init_scripts`. + :param instance_pool_id: str (optional) + The optional ID of the instance pool to which the cluster belongs. + :param node_type_id: str (optional) + This field encodes, through a single value, the resources available to each of the Spark nodes in + this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute + intensive workloads. A list of available node types can be retrieved by using the + :method:clusters/listNodeTypes API call. + :param num_workers: int (optional) + Number of worker nodes that this cluster should have. A cluster has one Spark Driver and + `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. + + Note: When reading the properties of a cluster, this field reflects the desired number of workers + rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 + workers, this field will immediately be updated to reflect the target size of 10 workers, whereas + the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are + provisioned. + :param policy_id: str (optional) + The ID of the cluster policy used to create the cluster if applicable. + :param runtime_engine: :class:`RuntimeEngine` (optional) + Decides which runtime engine to be use, e.g. Standard vs. Photon. If unspecified, the runtime engine + is inferred from spark_version. + :param single_user_name: str (optional) + Single user name if data_security_mode is `SINGLE_USER` + :param spark_conf: Dict[str,str] (optional) + An object containing a set of optional, user-specified Spark configuration key-value pairs. Users + can also pass in a string of extra JVM options to the driver and the executors via + `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + :param spark_env_vars: Dict[str,str] (optional) + An object containing a set of optional, user-specified environment variable key-value pairs. Please + note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while + launching the driver and workers. + + In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to + `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks + managed environmental variables are included as well. + + Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": + "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS + -Dspark.shuffle.service.enabled=true"}` + :param ssh_public_keys: List[str] (optional) + SSH public key contents that will be added to each Spark node in this cluster. The corresponding + private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be + specified. + :param workload_type: :class:`WorkloadType` (optional) + + :returns: + Long-running operation waiter for :class:`ClusterDetails`. + See :method:wait_get_cluster_running for more details. + + + .. py:method:: edit_and_wait(cluster_id: str, spark_version: str [, apply_policy_default_values: Optional[bool], autoscale: Optional[AutoScale], autotermination_minutes: Optional[int], aws_attributes: Optional[AwsAttributes], azure_attributes: Optional[AzureAttributes], cluster_log_conf: Optional[ClusterLogConf], cluster_name: Optional[str], cluster_source: Optional[ClusterSource], custom_tags: Optional[Dict[str, str]], data_security_mode: Optional[DataSecurityMode], docker_image: Optional[DockerImage], driver_instance_pool_id: Optional[str], driver_node_type_id: Optional[str], enable_elastic_disk: Optional[bool], enable_local_disk_encryption: Optional[bool], gcp_attributes: Optional[GcpAttributes], init_scripts: Optional[List[InitScriptInfo]], instance_pool_id: Optional[str], node_type_id: Optional[str], num_workers: Optional[int], policy_id: Optional[str], runtime_engine: Optional[RuntimeEngine], single_user_name: Optional[str], spark_conf: Optional[Dict[str, str]], spark_env_vars: Optional[Dict[str, str]], ssh_public_keys: Optional[List[str]], workload_type: Optional[WorkloadType], timeout: datetime.timedelta = 0:20:00]) -> ClusterDetails + + + .. py:method:: ensure_cluster_is_running(cluster_id: str) + + + Usage: + + .. code-block:: + + import os + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import compute + + w = WorkspaceClient() + + cluster_id = os.environ["TEST_DEFAULT_CLUSTER_ID"] + + context = w.command_execution.create(cluster_id=cluster_id, language=compute.Language.PYTHON).result() + + w.clusters.ensure_cluster_is_running(cluster_id) + + # cleanup + w.command_execution.destroy(cluster_id=cluster_id, context_id=context.id) + + Ensures that given cluster is running, regardless of the current state + + .. py:method:: events(cluster_id: str [, end_time: Optional[int], event_types: Optional[List[EventType]], limit: Optional[int], offset: Optional[int], order: Optional[GetEventsOrder], start_time: Optional[int]]) -> Iterator[ClusterEvent] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + events = w.clusters.events(cluster_id=clstr.cluster_id) + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + List cluster activity events. + + Retrieves a list of events about the activity of a cluster. This API is paginated. If there are more + events to read, the response includes all the nparameters necessary to request the next page of + events. + + :param cluster_id: str + The ID of the cluster to retrieve events about. + :param end_time: int (optional) + The end time in epoch milliseconds. If empty, returns events up to the current time. + :param event_types: List[:class:`EventType`] (optional) + An optional set of event types to filter on. If empty, all event types are returned. + :param limit: int (optional) + The maximum number of events to include in a page of events. Defaults to 50, and maximum allowed + value is 500. + :param offset: int (optional) + The offset in the result set. Defaults to 0 (no offset). When an offset is specified and the results + are requested in descending order, the end_time field is required. + :param order: :class:`GetEventsOrder` (optional) + The order to list events in; either "ASC" or "DESC". Defaults to "DESC". + :param start_time: int (optional) + The start time in epoch milliseconds. If empty, returns events starting from the beginning of time. + + :returns: Iterator over :class:`ClusterEvent` + + + .. py:method:: get(cluster_id: str) -> ClusterDetails + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + by_id = w.clusters.get(cluster_id=clstr.cluster_id) + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Get cluster info. + + Retrieves the information for a cluster given its identifier. Clusters can be described while they are + running, or up to 60 days after they are terminated. + + :param cluster_id: str + The cluster about which to retrieve information. + + :returns: :class:`ClusterDetails` + + + .. py:method:: get_permission_levels(cluster_id: str) -> GetClusterPermissionLevelsResponse + + Get cluster permission levels. + + Gets the permission levels that a user can have on an object. + + :param cluster_id: str + The cluster for which to get or manage permissions. + + :returns: :class:`GetClusterPermissionLevelsResponse` + + + .. py:method:: get_permissions(cluster_id: str) -> ClusterPermissions + + Get cluster permissions. + + Gets the permissions of a cluster. Clusters can inherit permissions from their root object. + + :param cluster_id: str + The cluster for which to get or manage permissions. + + :returns: :class:`ClusterPermissions` + + + .. py:method:: list( [, can_use_client: Optional[str]]) -> Iterator[ClusterDetails] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import compute + + w = WorkspaceClient() + + all = w.clusters.list(compute.ListClustersRequest()) + + List all clusters. + + Return information about all pinned clusters, active clusters, up to 200 of the most recently + terminated all-purpose clusters in the past 30 days, and up to 30 of the most recently terminated job + clusters in the past 30 days. + + For example, if there is 1 pinned cluster, 4 active clusters, 45 terminated all-purpose clusters in + the past 30 days, and 50 terminated job clusters in the past 30 days, then this API returns the 1 + pinned cluster, 4 active clusters, all 45 terminated all-purpose clusters, and the 30 most recently + terminated job clusters. + + :param can_use_client: str (optional) + Filter clusters based on what type of client it can be used for. Could be either NOTEBOOKS or JOBS. + No input for this field will get all clusters in the workspace without filtering on its supported + client + + :returns: Iterator over :class:`ClusterDetails` + + + .. py:method:: list_node_types() -> ListNodeTypesResponse + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + nodes = w.clusters.list_node_types() + + List node types. + + Returns a list of supported Spark node types. These node types can be used to launch a cluster. + + :returns: :class:`ListNodeTypesResponse` + + + .. py:method:: list_zones() -> ListAvailableZonesResponse + + List availability zones. + + Returns a list of availability zones where clusters can be created in (For example, us-west-2a). These + zones can be used to launch a cluster. + + :returns: :class:`ListAvailableZonesResponse` + + + .. py:method:: permanent_delete(cluster_id: str) + + Permanently delete cluster. + + Permanently deletes a Spark cluster. This cluster is terminated and resources are asynchronously + removed. + + In addition, users will no longer see permanently deleted clusters in the cluster list, and API users + can no longer perform any action on permanently deleted clusters. + + :param cluster_id: str + The cluster to be deleted. + + + + + .. py:method:: pin(cluster_id: str) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + w.clusters.pin(cluster_id=clstr.cluster_id) + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Pin cluster. + + Pinning a cluster ensures that the cluster will always be returned by the ListClusters API. Pinning a + cluster that is already pinned will have no effect. This API can only be called by workspace admins. + + :param cluster_id: str + + + + + + .. py:method:: resize(cluster_id: str [, autoscale: Optional[AutoScale], num_workers: Optional[int]]) -> Wait[ClusterDetails] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + by_id = w.clusters.resize(cluster_id=clstr.cluster_id, num_workers=1).result() + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Resize cluster. + + Resizes a cluster to have a desired number of workers. This will fail unless the cluster is in a + `RUNNING` state. + + :param cluster_id: str + The cluster to be resized. + :param autoscale: :class:`AutoScale` (optional) + Parameters needed in order to automatically scale clusters up and down based on load. Note: + autoscaling works best with DB runtime versions 3.0 or later. + :param num_workers: int (optional) + Number of worker nodes that this cluster should have. A cluster has one Spark Driver and + `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. + + Note: When reading the properties of a cluster, this field reflects the desired number of workers + rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 + workers, this field will immediately be updated to reflect the target size of 10 workers, whereas + the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are + provisioned. + + :returns: + Long-running operation waiter for :class:`ClusterDetails`. + See :method:wait_get_cluster_running for more details. + + + .. py:method:: resize_and_wait(cluster_id: str [, autoscale: Optional[AutoScale], num_workers: Optional[int], timeout: datetime.timedelta = 0:20:00]) -> ClusterDetails + + + .. py:method:: restart(cluster_id: str [, restart_user: Optional[str]]) -> Wait[ClusterDetails] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + _ = w.clusters.restart(cluster_id=clstr.cluster_id).result() + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Restart cluster. + + Restarts a Spark cluster with the supplied ID. If the cluster is not currently in a `RUNNING` state, + nothing will happen. + + :param cluster_id: str + The cluster to be started. + :param restart_user: str (optional) + + + :returns: + Long-running operation waiter for :class:`ClusterDetails`. + See :method:wait_get_cluster_running for more details. + + + .. py:method:: restart_and_wait(cluster_id: str [, restart_user: Optional[str], timeout: datetime.timedelta = 0:20:00]) -> ClusterDetails + + + .. py:method:: select_node_type(min_memory_gb: int, gb_per_core: int, min_cores: int, min_gpus: int, local_disk: bool, local_disk_min_size: int, category: str, photon_worker_capable: bool, photon_driver_capable: bool, graviton: bool, is_io_cache_enabled: bool, support_port_forwarding: bool, fleet: str) -> str + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + smallest = w.clusters.select_node_type(local_disk=True) + + Selects smallest available node type given the conditions. + + :param min_memory_gb: int + :param gb_per_core: int + :param min_cores: int + :param min_gpus: int + :param local_disk: bool + :param local_disk_min_size: bool + :param category: bool + :param photon_worker_capable: bool + :param photon_driver_capable: bool + :param graviton: bool + :param is_io_cache_enabled: bool + + :returns: `node_type` compatible string + + + .. py:method:: select_spark_version(long_term_support: bool = False, beta: bool = False, latest: bool = True, ml: bool = False, genomics: bool = False, gpu: bool = False, scala: str = 2.12, spark_version: str, photon: bool = False, graviton: bool = False) -> str + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + Selects the latest Databricks Runtime Version. + + :param long_term_support: bool + :param beta: bool + :param latest: bool + :param ml: bool + :param gpu: bool + :param scala: bool + :param spark_version: bool + :param photon: bool + :param graviton: bool + + :returns: `spark_version` compatible string + + + .. py:method:: set_permissions(cluster_id: str [, access_control_list: Optional[List[ClusterAccessControlRequest]]]) -> ClusterPermissions + + Set cluster permissions. + + Sets permissions on a cluster. Clusters can inherit permissions from their root object. + + :param cluster_id: str + The cluster for which to get or manage permissions. + :param access_control_list: List[:class:`ClusterAccessControlRequest`] (optional) + + :returns: :class:`ClusterPermissions` + + + .. py:method:: spark_versions() -> GetSparkVersionsResponse + + List available Spark versions. + + Returns the list of available Spark versions. These versions can be used to launch a cluster. + + :returns: :class:`GetSparkVersionsResponse` + + + .. py:method:: start(cluster_id: str) -> Wait[ClusterDetails] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + _ = w.clusters.start(cluster_id=clstr.cluster_id).result() + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Start terminated cluster. + + Starts a terminated Spark cluster with the supplied ID. This works similar to `createCluster` except: + + * The previous cluster id and attributes are preserved. * The cluster starts with the last specified + cluster size. * If the previous cluster was an autoscaling cluster, the current cluster starts with + the minimum number of nodes. * If the cluster is not currently in a `TERMINATED` state, nothing will + happen. * Clusters launched to run a job cannot be started. + + :param cluster_id: str + The cluster to be started. + + :returns: + Long-running operation waiter for :class:`ClusterDetails`. + See :method:wait_get_cluster_running for more details. + + + .. py:method:: start_and_wait(cluster_id: str, timeout: datetime.timedelta = 0:20:00) -> ClusterDetails + + + .. py:method:: unpin(cluster_id: str) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + latest = w.clusters.select_spark_version(latest=True) + + cluster_name = f'sdk-{time.time_ns()}' + + clstr = w.clusters.create(cluster_name=cluster_name, + spark_version=latest, + instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + autotermination_minutes=15, + num_workers=1).result() + + w.clusters.unpin(cluster_id=clstr.cluster_id) + + # cleanup + w.clusters.permanent_delete(cluster_id=clstr.cluster_id) + + Unpin cluster. + + Unpinning a cluster will allow the cluster to eventually be removed from the ListClusters API. + Unpinning a cluster that is not pinned will have no effect. This API can only be called by workspace + admins. + + :param cluster_id: str + + + + + + .. py:method:: update_permissions(cluster_id: str [, access_control_list: Optional[List[ClusterAccessControlRequest]]]) -> ClusterPermissions + + Update cluster permissions. + + Updates the permissions on a cluster. Clusters can inherit permissions from their root object. + + :param cluster_id: str + The cluster for which to get or manage permissions. + :param access_control_list: List[:class:`ClusterAccessControlRequest`] (optional) + + :returns: :class:`ClusterPermissions` + + + .. py:method:: wait_get_cluster_running(cluster_id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[ClusterDetails], None]]) -> ClusterDetails + + + .. py:method:: wait_get_cluster_terminated(cluster_id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[ClusterDetails], None]]) -> ClusterDetails diff --git a/docs/workspace/compute/command_execution.rst b/docs/workspace/compute/command_execution.rst new file mode 100644 index 000000000..a5b94b5a5 --- /dev/null +++ b/docs/workspace/compute/command_execution.rst @@ -0,0 +1,161 @@ +``w.command_execution``: Command Execution +========================================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: CommandExecutionAPI + + This API allows execution of Python, Scala, SQL, or R commands on running Databricks Clusters. + + .. py:method:: cancel( [, cluster_id: Optional[str], command_id: Optional[str], context_id: Optional[str]]) -> Wait[CommandStatusResponse] + + Cancel a command. + + Cancels a currently running command within an execution context. + + The command ID is obtained from a prior successful call to __execute__. + + :param cluster_id: str (optional) + :param command_id: str (optional) + :param context_id: str (optional) + + :returns: + Long-running operation waiter for :class:`CommandStatusResponse`. + See :method:wait_command_status_command_execution_cancelled for more details. + + + .. py:method:: cancel_and_wait( [, cluster_id: Optional[str], command_id: Optional[str], context_id: Optional[str], timeout: datetime.timedelta = 0:20:00]) -> CommandStatusResponse + + + .. py:method:: command_status(cluster_id: str, context_id: str, command_id: str) -> CommandStatusResponse + + Get command info. + + Gets the status of and, if available, the results from a currently executing command. + + The command ID is obtained from a prior successful call to __execute__. + + :param cluster_id: str + :param context_id: str + :param command_id: str + + :returns: :class:`CommandStatusResponse` + + + .. py:method:: context_status(cluster_id: str, context_id: str) -> ContextStatusResponse + + Get status. + + Gets the status for an execution context. + + :param cluster_id: str + :param context_id: str + + :returns: :class:`ContextStatusResponse` + + + .. py:method:: create( [, cluster_id: Optional[str], language: Optional[Language]]) -> Wait[ContextStatusResponse] + + + Usage: + + .. code-block:: + + import os + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import compute + + w = WorkspaceClient() + + cluster_id = os.environ["TEST_DEFAULT_CLUSTER_ID"] + + context = w.command_execution.create(cluster_id=cluster_id, language=compute.Language.PYTHON).result() + + # cleanup + w.command_execution.destroy(cluster_id=cluster_id, context_id=context.id) + + Create an execution context. + + Creates an execution context for running cluster commands. + + If successful, this method returns the ID of the new execution context. + + :param cluster_id: str (optional) + Running cluster id + :param language: :class:`Language` (optional) + + :returns: + Long-running operation waiter for :class:`ContextStatusResponse`. + See :method:wait_context_status_command_execution_running for more details. + + + .. py:method:: create_and_wait( [, cluster_id: Optional[str], language: Optional[Language], timeout: datetime.timedelta = 0:20:00]) -> ContextStatusResponse + + + .. py:method:: destroy(cluster_id: str, context_id: str) + + Delete an execution context. + + Deletes an execution context. + + :param cluster_id: str + :param context_id: str + + + + + .. py:method:: execute( [, cluster_id: Optional[str], command: Optional[str], context_id: Optional[str], language: Optional[Language]]) -> Wait[CommandStatusResponse] + + + Usage: + + .. code-block:: + + import os + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import compute + + w = WorkspaceClient() + + cluster_id = os.environ["TEST_DEFAULT_CLUSTER_ID"] + + context = w.command_execution.create(cluster_id=cluster_id, language=compute.Language.PYTHON).result() + + text_results = w.command_execution.execute(cluster_id=cluster_id, + context_id=context.id, + language=compute.Language.PYTHON, + command="print(1)").result() + + # cleanup + w.command_execution.destroy(cluster_id=cluster_id, context_id=context.id) + + Run a command. + + Runs a cluster command in the given execution context, using the provided language. + + If successful, it returns an ID for tracking the status of the command's execution. + + :param cluster_id: str (optional) + Running cluster id + :param command: str (optional) + Executable code + :param context_id: str (optional) + Running context id + :param language: :class:`Language` (optional) + + :returns: + Long-running operation waiter for :class:`CommandStatusResponse`. + See :method:wait_command_status_command_execution_finished_or_error for more details. + + + .. py:method:: execute_and_wait( [, cluster_id: Optional[str], command: Optional[str], context_id: Optional[str], language: Optional[Language], timeout: datetime.timedelta = 0:20:00]) -> CommandStatusResponse + + + .. py:method:: wait_command_status_command_execution_cancelled(cluster_id: str, command_id: str, context_id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[CommandStatusResponse], None]]) -> CommandStatusResponse + + + .. py:method:: wait_command_status_command_execution_finished_or_error(cluster_id: str, command_id: str, context_id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[CommandStatusResponse], None]]) -> CommandStatusResponse + + + .. py:method:: wait_context_status_command_execution_running(cluster_id: str, context_id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[ContextStatusResponse], None]]) -> ContextStatusResponse diff --git a/docs/workspace/compute/global_init_scripts.rst b/docs/workspace/compute/global_init_scripts.rst new file mode 100644 index 000000000..3688ed25c --- /dev/null +++ b/docs/workspace/compute/global_init_scripts.rst @@ -0,0 +1,180 @@ +``w.global_init_scripts``: Global Init Scripts +============================================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: GlobalInitScriptsAPI + + The Global Init Scripts API enables Workspace administrators to configure global initialization scripts + for their workspace. These scripts run on every node in every cluster in the workspace. + + **Important:** Existing clusters must be restarted to pick up any changes made to global init scripts. + Global init scripts are run in order. If the init script returns with a bad exit code, the Apache Spark + container fails to launch and init scripts with later position are skipped. If enough containers fail, the + entire cluster fails with a `GLOBAL_INIT_SCRIPT_FAILURE` error code. + + .. py:method:: create(name: str, script: str [, enabled: Optional[bool], position: Optional[int]]) -> CreateResponse + + + Usage: + + .. code-block:: + + import base64 + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.global_init_scripts.create(name=f'sdk-{time.time_ns()}', + script=base64.b64encode(("echo 1").encode()).decode(), + enabled=True, + position=10) + + # cleanup + w.global_init_scripts.delete(script_id=created.script_id) + + Create init script. + + Creates a new global init script in this workspace. + + :param name: str + The name of the script + :param script: str + The Base64-encoded content of the script. + :param enabled: bool (optional) + Specifies whether the script is enabled. The script runs only if enabled. + :param position: int (optional) + The position of a global init script, where 0 represents the first script to run, 1 is the second + script to run, in ascending order. + + If you omit the numeric position for a new global init script, it defaults to last position. It will + run after all current scripts. Setting any value greater than the position of the last script is + equivalent to the last position. Example: Take three existing scripts with positions 0, 1, and 2. + Any position of (3) or greater puts the script in the last position. If an explicit position value + conflicts with an existing script value, your request succeeds, but the original script at that + position and all later scripts have their positions incremented by 1. + + :returns: :class:`CreateResponse` + + + .. py:method:: delete(script_id: str) + + Delete init script. + + Deletes a global init script. + + :param script_id: str + The ID of the global init script. + + + + + .. py:method:: get(script_id: str) -> GlobalInitScriptDetailsWithContent + + + Usage: + + .. code-block:: + + import base64 + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.global_init_scripts.create(name=f'sdk-{time.time_ns()}', + script=base64.b64encode(("echo 1").encode()).decode(), + enabled=True, + position=10) + + by_id = w.global_init_scripts.get(script_id=created.script_id) + + # cleanup + w.global_init_scripts.delete(script_id=created.script_id) + + Get an init script. + + Gets all the details of a script, including its Base64-encoded contents. + + :param script_id: str + The ID of the global init script. + + :returns: :class:`GlobalInitScriptDetailsWithContent` + + + .. py:method:: list() -> Iterator[GlobalInitScriptDetails] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.global_init_scripts.list() + + Get init scripts. + + Get a list of all global init scripts for this workspace. This returns all properties for each script + but **not** the script contents. To retrieve the contents of a script, use the [get a global init + script](#operation/get-script) operation. + + :returns: Iterator over :class:`GlobalInitScriptDetails` + + + .. py:method:: update(script_id: str, name: str, script: str [, enabled: Optional[bool], position: Optional[int]]) + + + Usage: + + .. code-block:: + + import base64 + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.global_init_scripts.create(name=f'sdk-{time.time_ns()}', + script=base64.b64encode(("echo 1").encode()).decode(), + enabled=True, + position=10) + + w.global_init_scripts.update(script_id=created.script_id, + name=f'sdk-{time.time_ns()}', + script=base64.b64encode(("echo 2").encode()).decode()) + + # cleanup + w.global_init_scripts.delete(script_id=created.script_id) + + Update init script. + + Updates a global init script, specifying only the fields to change. All fields are optional. + Unspecified fields retain their current value. + + :param script_id: str + The ID of the global init script. + :param name: str + The name of the script + :param script: str + The Base64-encoded content of the script. + :param enabled: bool (optional) + Specifies whether the script is enabled. The script runs only if enabled. + :param position: int (optional) + The position of a script, where 0 represents the first script to run, 1 is the second script to run, + in ascending order. To move the script to run first, set its position to 0. + + To move the script to the end, set its position to any value greater or equal to the position of the + last script. Example, three existing scripts with positions 0, 1, and 2. Any position value of 2 or + greater puts the script in the last position (2). + + If an explicit position value conflicts with an existing script, your request succeeds, but the + original script at that position and all later scripts have their positions incremented by 1. + + + \ No newline at end of file diff --git a/docs/workspace/compute/index.rst b/docs/workspace/compute/index.rst new file mode 100644 index 000000000..b13a21610 --- /dev/null +++ b/docs/workspace/compute/index.rst @@ -0,0 +1,17 @@ + +Compute +======= + +Use and configure compute for Databricks + +.. toctree:: + :maxdepth: 1 + + cluster_policies + clusters + command_execution + global_init_scripts + instance_pools + instance_profiles + libraries + policy_families \ No newline at end of file diff --git a/docs/workspace/compute/instance_pools.rst b/docs/workspace/compute/instance_pools.rst new file mode 100644 index 000000000..277844170 --- /dev/null +++ b/docs/workspace/compute/instance_pools.rst @@ -0,0 +1,269 @@ +``w.instance_pools``: Instance Pools +==================================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: InstancePoolsAPI + + Instance Pools API are used to create, edit, delete and list instance pools by using ready-to-use cloud + instances which reduces a cluster start and auto-scaling times. + + Databricks pools reduce cluster start and auto-scaling times by maintaining a set of idle, ready-to-use + instances. When a cluster is attached to a pool, cluster nodes are created using the pool’s idle + instances. If the pool has no idle instances, the pool expands by allocating a new instance from the + instance provider in order to accommodate the cluster’s request. When a cluster releases an instance, it + returns to the pool and is free for another cluster to use. Only clusters attached to a pool can use that + pool’s idle instances. + + You can specify a different pool for the driver node and worker nodes, or use the same pool for both. + + Databricks does not charge DBUs while instances are idle in the pool. Instance provider billing does + apply. See pricing. + + .. py:method:: create(instance_pool_name: str, node_type_id: str [, aws_attributes: Optional[InstancePoolAwsAttributes], azure_attributes: Optional[InstancePoolAzureAttributes], custom_tags: Optional[Dict[str, str]], disk_spec: Optional[DiskSpec], enable_elastic_disk: Optional[bool], gcp_attributes: Optional[InstancePoolGcpAttributes], idle_instance_autotermination_minutes: Optional[int], max_capacity: Optional[int], min_idle_instances: Optional[int], preloaded_docker_images: Optional[List[DockerImage]], preloaded_spark_versions: Optional[List[str]]]) -> CreateInstancePoolResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + smallest = w.clusters.select_node_type(local_disk=True) + + created = w.instance_pools.create(instance_pool_name=f'sdk-{time.time_ns()}', node_type_id=smallest) + + # cleanup + w.instance_pools.delete(instance_pool_id=created.instance_pool_id) + + Create a new instance pool. + + Creates a new instance pool using idle and ready-to-use cloud instances. + + :param instance_pool_name: str + Pool name requested by the user. Pool name must be unique. Length must be between 1 and 100 + characters. + :param node_type_id: str + This field encodes, through a single value, the resources available to each of the Spark nodes in + this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute + intensive workloads. A list of available node types can be retrieved by using the + :method:clusters/listNodeTypes API call. + :param aws_attributes: :class:`InstancePoolAwsAttributes` (optional) + Attributes related to instance pools running on Amazon Web Services. If not specified at pool + creation, a set of default values will be used. + :param azure_attributes: :class:`InstancePoolAzureAttributes` (optional) + Attributes related to instance pools running on Azure. If not specified at pool creation, a set of + default values will be used. + :param custom_tags: Dict[str,str] (optional) + Additional tags for pool resources. Databricks will tag all pool resources (e.g., AWS instances and + EBS volumes) with these tags in addition to `default_tags`. Notes: + + - Currently, Databricks allows at most 45 custom tags + :param disk_spec: :class:`DiskSpec` (optional) + Defines the specification of the disks that will be attached to all spark containers. + :param enable_elastic_disk: bool (optional) + Autoscaling Local Storage: when enabled, this instances in this pool will dynamically acquire + additional disk space when its Spark workers are running low on disk space. In AWS, this feature + requires specific AWS permissions to function correctly - refer to the User Guide for more details. + :param gcp_attributes: :class:`InstancePoolGcpAttributes` (optional) + Attributes related to instance pools running on Google Cloud Platform. If not specified at pool + creation, a set of default values will be used. + :param idle_instance_autotermination_minutes: int (optional) + Automatically terminates the extra instances in the pool cache after they are inactive for this time + in minutes if min_idle_instances requirement is already met. If not set, the extra pool instances + will be automatically terminated after a default timeout. If specified, the threshold must be + between 0 and 10000 minutes. Users can also set this value to 0 to instantly remove idle instances + from the cache if min cache size could still hold. + :param max_capacity: int (optional) + Maximum number of outstanding instances to keep in the pool, including both instances used by + clusters and idle instances. Clusters that require further instance provisioning will fail during + upsize requests. + :param min_idle_instances: int (optional) + Minimum number of idle instances to keep in the instance pool + :param preloaded_docker_images: List[:class:`DockerImage`] (optional) + Custom Docker Image BYOC + :param preloaded_spark_versions: List[str] (optional) + A list containing at most one preloaded Spark image version for the pool. Pool-backed clusters + started with the preloaded Spark version will start faster. A list of available Spark versions can + be retrieved by using the :method:clusters/sparkVersions API call. + + :returns: :class:`CreateInstancePoolResponse` + + + .. py:method:: delete(instance_pool_id: str) + + Delete an instance pool. + + Deletes the instance pool permanently. The idle instances in the pool are terminated asynchronously. + + :param instance_pool_id: str + The instance pool to be terminated. + + + + + .. py:method:: edit(instance_pool_id: str, instance_pool_name: str, node_type_id: str [, custom_tags: Optional[Dict[str, str]], idle_instance_autotermination_minutes: Optional[int], max_capacity: Optional[int], min_idle_instances: Optional[int]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + smallest = w.clusters.select_node_type(local_disk=True) + + created = w.instance_pools.create(instance_pool_name=f'sdk-{time.time_ns()}', node_type_id=smallest) + + w.instance_pools.edit(instance_pool_id=created.instance_pool_id, + instance_pool_name=f'sdk-{time.time_ns()}', + node_type_id=smallest) + + # cleanup + w.instance_pools.delete(instance_pool_id=created.instance_pool_id) + + Edit an existing instance pool. + + Modifies the configuration of an existing instance pool. + + :param instance_pool_id: str + Instance pool ID + :param instance_pool_name: str + Pool name requested by the user. Pool name must be unique. Length must be between 1 and 100 + characters. + :param node_type_id: str + This field encodes, through a single value, the resources available to each of the Spark nodes in + this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute + intensive workloads. A list of available node types can be retrieved by using the + :method:clusters/listNodeTypes API call. + :param custom_tags: Dict[str,str] (optional) + Additional tags for pool resources. Databricks will tag all pool resources (e.g., AWS instances and + EBS volumes) with these tags in addition to `default_tags`. Notes: + + - Currently, Databricks allows at most 45 custom tags + :param idle_instance_autotermination_minutes: int (optional) + Automatically terminates the extra instances in the pool cache after they are inactive for this time + in minutes if min_idle_instances requirement is already met. If not set, the extra pool instances + will be automatically terminated after a default timeout. If specified, the threshold must be + between 0 and 10000 minutes. Users can also set this value to 0 to instantly remove idle instances + from the cache if min cache size could still hold. + :param max_capacity: int (optional) + Maximum number of outstanding instances to keep in the pool, including both instances used by + clusters and idle instances. Clusters that require further instance provisioning will fail during + upsize requests. + :param min_idle_instances: int (optional) + Minimum number of idle instances to keep in the instance pool + + + + + .. py:method:: get(instance_pool_id: str) -> GetInstancePool + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + smallest = w.clusters.select_node_type(local_disk=True) + + created = w.instance_pools.create(instance_pool_name=f'sdk-{time.time_ns()}', node_type_id=smallest) + + by_id = w.instance_pools.get(instance_pool_id=created.instance_pool_id) + + # cleanup + w.instance_pools.delete(instance_pool_id=created.instance_pool_id) + + Get instance pool information. + + Retrieve the information for an instance pool based on its identifier. + + :param instance_pool_id: str + The canonical unique identifier for the instance pool. + + :returns: :class:`GetInstancePool` + + + .. py:method:: get_permission_levels(instance_pool_id: str) -> GetInstancePoolPermissionLevelsResponse + + Get instance pool permission levels. + + Gets the permission levels that a user can have on an object. + + :param instance_pool_id: str + The instance pool for which to get or manage permissions. + + :returns: :class:`GetInstancePoolPermissionLevelsResponse` + + + .. py:method:: get_permissions(instance_pool_id: str) -> InstancePoolPermissions + + Get instance pool permissions. + + Gets the permissions of an instance pool. Instance pools can inherit permissions from their root + object. + + :param instance_pool_id: str + The instance pool for which to get or manage permissions. + + :returns: :class:`InstancePoolPermissions` + + + .. py:method:: list() -> Iterator[InstancePoolAndStats] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.instance_pools.list() + + List instance pool info. + + Gets a list of instance pools with their statistics. + + :returns: Iterator over :class:`InstancePoolAndStats` + + + .. py:method:: set_permissions(instance_pool_id: str [, access_control_list: Optional[List[InstancePoolAccessControlRequest]]]) -> InstancePoolPermissions + + Set instance pool permissions. + + Sets permissions on an instance pool. Instance pools can inherit permissions from their root object. + + :param instance_pool_id: str + The instance pool for which to get or manage permissions. + :param access_control_list: List[:class:`InstancePoolAccessControlRequest`] (optional) + + :returns: :class:`InstancePoolPermissions` + + + .. py:method:: update_permissions(instance_pool_id: str [, access_control_list: Optional[List[InstancePoolAccessControlRequest]]]) -> InstancePoolPermissions + + Update instance pool permissions. + + Updates the permissions on an instance pool. Instance pools can inherit permissions from their root + object. + + :param instance_pool_id: str + The instance pool for which to get or manage permissions. + :param access_control_list: List[:class:`InstancePoolAccessControlRequest`] (optional) + + :returns: :class:`InstancePoolPermissions` + \ No newline at end of file diff --git a/docs/workspace/compute/instance_profiles.rst b/docs/workspace/compute/instance_profiles.rst new file mode 100644 index 000000000..a7a25f869 --- /dev/null +++ b/docs/workspace/compute/instance_profiles.rst @@ -0,0 +1,144 @@ +``w.instance_profiles``: Instance Profiles +========================================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: InstanceProfilesAPI + + The Instance Profiles API allows admins to add, list, and remove instance profiles that users can launch + clusters with. Regular users can list the instance profiles available to them. See [Secure access to S3 + buckets] using instance profiles for more information. + + [Secure access to S3 buckets]: https://docs.databricks.com/administration-guide/cloud-configurations/aws/instance-profiles.html + + .. py:method:: add(instance_profile_arn: str [, iam_role_arn: Optional[str], is_meta_instance_profile: Optional[bool], skip_validation: Optional[bool]]) + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + arn = "arn:aws:iam::000000000000:instance-profile/abc" + + w.instance_profiles.add(instance_profile_arn=arn, + skip_validation=True, + iam_role_arn="arn:aws:iam::000000000000:role/bcd") + + Register an instance profile. + + In the UI, you can select the instance profile when launching clusters. This API is only available to + admin users. + + :param instance_profile_arn: str + The AWS ARN of the instance profile to register with Databricks. This field is required. + :param iam_role_arn: str (optional) + The AWS IAM role ARN of the role associated with the instance profile. This field is required if + your role name and instance profile name do not match and you want to use the instance profile with + [Databricks SQL Serverless]. + + Otherwise, this field is optional. + + [Databricks SQL Serverless]: https://docs.databricks.com/sql/admin/serverless.html + :param is_meta_instance_profile: bool (optional) + Boolean flag indicating whether the instance profile should only be used in credential passthrough + scenarios. If true, it means the instance profile contains an meta IAM role which could assume a + wide range of roles. Therefore it should always be used with authorization. This field is optional, + the default value is `false`. + :param skip_validation: bool (optional) + By default, Databricks validates that it has sufficient permissions to launch instances with the + instance profile. This validation uses AWS dry-run mode for the RunInstances API. If validation + fails with an error message that does not indicate an IAM related permission issue, (e.g. “Your + requested instance type is not supported in your requested availability zone”), you can pass this + flag to skip the validation and forcibly add the instance profile. + + + + + .. py:method:: edit(instance_profile_arn: str [, iam_role_arn: Optional[str], is_meta_instance_profile: Optional[bool]]) + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + arn = "arn:aws:iam::000000000000:instance-profile/abc" + + w.instance_profiles.edit(instance_profile_arn=arn, iam_role_arn="arn:aws:iam::000000000000:role/bcdf") + + Edit an instance profile. + + The only supported field to change is the optional IAM role ARN associated with the instance profile. + It is required to specify the IAM role ARN if both of the following are true: + + * Your role name and instance profile name do not match. The name is the part after the last slash in + each ARN. * You want to use the instance profile with [Databricks SQL Serverless]. + + To understand where these fields are in the AWS console, see [Enable serverless SQL warehouses]. + + This API is only available to admin users. + + [Databricks SQL Serverless]: https://docs.databricks.com/sql/admin/serverless.html + [Enable serverless SQL warehouses]: https://docs.databricks.com/sql/admin/serverless.html + + :param instance_profile_arn: str + The AWS ARN of the instance profile to register with Databricks. This field is required. + :param iam_role_arn: str (optional) + The AWS IAM role ARN of the role associated with the instance profile. This field is required if + your role name and instance profile name do not match and you want to use the instance profile with + [Databricks SQL Serverless]. + + Otherwise, this field is optional. + + [Databricks SQL Serverless]: https://docs.databricks.com/sql/admin/serverless.html + :param is_meta_instance_profile: bool (optional) + Boolean flag indicating whether the instance profile should only be used in credential passthrough + scenarios. If true, it means the instance profile contains an meta IAM role which could assume a + wide range of roles. Therefore it should always be used with authorization. This field is optional, + the default value is `false`. + + + + + .. py:method:: list() -> Iterator[InstanceProfile] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.instance_profiles.list() + + List available instance profiles. + + List the instance profiles that the calling user can use to launch a cluster. + + This API is available to all users. + + :returns: Iterator over :class:`InstanceProfile` + + + .. py:method:: remove(instance_profile_arn: str) + + Remove the instance profile. + + Remove the instance profile with the provided ARN. Existing clusters with this instance profile will + continue to function. + + This API is only accessible to admin users. + + :param instance_profile_arn: str + The ARN of the instance profile to remove. This field is required. + + + \ No newline at end of file diff --git a/docs/workspace/compute/libraries.rst b/docs/workspace/compute/libraries.rst new file mode 100644 index 000000000..41ed1c72e --- /dev/null +++ b/docs/workspace/compute/libraries.rst @@ -0,0 +1,90 @@ +``w.libraries``: Managed Libraries +================================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: LibrariesAPI + + The Libraries API allows you to install and uninstall libraries and get the status of libraries on a + cluster. + + To make third-party or custom code available to notebooks and jobs running on your clusters, you can + install a library. Libraries can be written in Python, Java, Scala, and R. You can upload Java, Scala, and + Python libraries and point to external packages in PyPI, Maven, and CRAN repositories. + + Cluster libraries can be used by all notebooks running on a cluster. You can install a cluster library + directly from a public repository such as PyPI or Maven, using a previously installed workspace library, + or using an init script. + + When you install a library on a cluster, a notebook already attached to that cluster will not immediately + see the new library. You must first detach and then reattach the notebook to the cluster. + + When you uninstall a library from a cluster, the library is removed only when you restart the cluster. + Until you restart the cluster, the status of the uninstalled library appears as Uninstall pending restart. + + .. py:method:: all_cluster_statuses() -> ListAllClusterLibraryStatusesResponse + + Get all statuses. + + Get the status of all libraries on all clusters. A status will be available for all libraries + installed on this cluster via the API or the libraries UI as well as libraries set to be installed on + all clusters via the libraries UI. + + :returns: :class:`ListAllClusterLibraryStatusesResponse` + + + .. py:method:: cluster_status(cluster_id: str) -> Iterator[LibraryFullStatus] + + Get status. + + Get the status of libraries on a cluster. A status will be available for all libraries installed on + this cluster via the API or the libraries UI as well as libraries set to be installed on all clusters + via the libraries UI. The order of returned libraries will be as follows. + + 1. Libraries set to be installed on this cluster will be returned first. Within this group, the final + order will be order in which the libraries were added to the cluster. + + 2. Libraries set to be installed on all clusters are returned next. Within this group there is no + order guarantee. + + 3. Libraries that were previously requested on this cluster or on all clusters, but now marked for + removal. Within this group there is no order guarantee. + + :param cluster_id: str + Unique identifier of the cluster whose status should be retrieved. + + :returns: Iterator over :class:`LibraryFullStatus` + + + .. py:method:: install(cluster_id: str, libraries: List[Library]) + + Add a library. + + Add libraries to be installed on a cluster. The installation is asynchronous; it happens in the + background after the completion of this request. + + **Note**: The actual set of libraries to be installed on a cluster is the union of the libraries + specified via this method and the libraries set to be installed on all clusters via the libraries UI. + + :param cluster_id: str + Unique identifier for the cluster on which to install these libraries. + :param libraries: List[:class:`Library`] + The libraries to install. + + + + + .. py:method:: uninstall(cluster_id: str, libraries: List[Library]) + + Uninstall libraries. + + Set libraries to be uninstalled on a cluster. The libraries won't be uninstalled until the cluster is + restarted. Uninstalling libraries that are not installed on the cluster will have no impact but is not + an error. + + :param cluster_id: str + Unique identifier for the cluster on which to uninstall these libraries. + :param libraries: List[:class:`Library`] + The libraries to uninstall. + + + \ No newline at end of file diff --git a/docs/workspace/compute/policy_families.rst b/docs/workspace/compute/policy_families.rst new file mode 100644 index 000000000..43194ef01 --- /dev/null +++ b/docs/workspace/compute/policy_families.rst @@ -0,0 +1,66 @@ +``w.policy_families``: Policy Families +====================================== +.. currentmodule:: databricks.sdk.service.compute + +.. py:class:: PolicyFamiliesAPI + + View available policy families. A policy family contains a policy definition providing best practices for + configuring clusters for a particular use case. + + Databricks manages and provides policy families for several common cluster use cases. You cannot create, + edit, or delete policy families. + + Policy families cannot be used directly to create clusters. Instead, you create cluster policies using a + policy family. Cluster policies created using a policy family inherit the policy family's policy + definition. + + .. py:method:: get(policy_family_id: str) -> PolicyFamily + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import compute + + w = WorkspaceClient() + + all = w.policy_families.list(compute.ListPolicyFamiliesRequest()) + + first_family = w.policy_families.get(policy_family_id=all[0].policy_family_id) + + Get policy family information. + + Retrieve the information for an policy family based on its identifier. + + :param policy_family_id: str + + :returns: :class:`PolicyFamily` + + + .. py:method:: list( [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[PolicyFamily] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import compute + + w = WorkspaceClient() + + all = w.policy_families.list(compute.ListPolicyFamiliesRequest()) + + List policy families. + + Retrieve a list of policy families. This API is paginated. + + :param max_results: int (optional) + The max number of policy families to return. + :param page_token: str (optional) + A token that can be used to get the next page of results. + + :returns: Iterator over :class:`PolicyFamily` + \ No newline at end of file diff --git a/docs/workspace/dashboards/index.rst b/docs/workspace/dashboards/index.rst new file mode 100644 index 000000000..756c9b549 --- /dev/null +++ b/docs/workspace/dashboards/index.rst @@ -0,0 +1,10 @@ + +Dashboards +========== + +Manage Lakeview dashboards + +.. toctree:: + :maxdepth: 1 + + lakeview \ No newline at end of file diff --git a/docs/workspace/dashboards/lakeview.rst b/docs/workspace/dashboards/lakeview.rst new file mode 100644 index 000000000..56e9bfb4b --- /dev/null +++ b/docs/workspace/dashboards/lakeview.rst @@ -0,0 +1,25 @@ +``w.lakeview``: Lakeview +======================== +.. currentmodule:: databricks.sdk.service.dashboards + +.. py:class:: LakeviewAPI + + These APIs provide specific management operations for Lakeview dashboards. Generic resource management can + be done with Workspace API (import, export, get-status, list, delete). + + .. py:method:: publish(dashboard_id: str [, embed_credentials: Optional[bool], warehouse_id: Optional[str]]) + + Publish dashboard. + + Publish the current draft dashboard. + + :param dashboard_id: str + UUID identifying the dashboard to be published. + :param embed_credentials: bool (optional) + Flag to indicate if the publisher's credentials should be embedded in the published dashboard. These + embedded credentials will be used to execute the published dashboard's queries. + :param warehouse_id: str (optional) + The ID of the warehouse that can be used to override the warehouse which was set in the draft. + + + \ No newline at end of file diff --git a/docs/workspace/files/dbfs.rst b/docs/workspace/files/dbfs.rst new file mode 100644 index 000000000..9bde1e8b6 --- /dev/null +++ b/docs/workspace/files/dbfs.rst @@ -0,0 +1,238 @@ +``w.dbfs``: DBFS +================ +.. currentmodule:: databricks.sdk.service.files + +.. py:class:: DbfsExt + + DBFS API makes it simple to interact with various data sources without having to include a users + credentials every time to read a file. + + .. py:method:: add_block(handle: int, data: str) + + Append data block. + + Appends a block of data to the stream specified by the input handle. If the handle does not exist, + this call will throw an exception with `RESOURCE_DOES_NOT_EXIST`. + + If the block of data exceeds 1 MB, this call will throw an exception with `MAX_BLOCK_SIZE_EXCEEDED`. + + :param handle: int + The handle on an open stream. + :param data: str + The base64-encoded data to append to the stream. This has a limit of 1 MB. + + + + + .. py:method:: close(handle: int) + + Close the stream. + + Closes the stream specified by the input handle. If the handle does not exist, this call throws an + exception with `RESOURCE_DOES_NOT_EXIST`. + + :param handle: int + The handle on an open stream. + + + + + .. py:method:: copy(src: str, dst: str [, recursive: bool = False, overwrite: bool = False]) + + Copy files between DBFS and local filesystems + + .. py:method:: create(path: str [, overwrite: Optional[bool]]) -> CreateResponse + + Open a stream. + + Opens a stream to write to a file and returns a handle to this stream. There is a 10 minute idle + timeout on this handle. If a file or directory already exists on the given path and __overwrite__ is + set to `false`, this call throws an exception with `RESOURCE_ALREADY_EXISTS`. + + A typical workflow for file upload would be: + + 1. Issue a `create` call and get a handle. 2. Issue one or more `add-block` calls with the handle you + have. 3. Issue a `close` call with the handle you have. + + :param path: str + The path of the new file. The path should be the absolute DBFS path. + :param overwrite: bool (optional) + The flag that specifies whether to overwrite existing file/files. + + :returns: :class:`CreateResponse` + + + .. py:method:: delete(path: str [, recursive: Optional[bool]]) + + Delete a file/directory. + + Delete the file or directory (optionally recursively delete all files in the directory). This call + throws an exception with `IO_ERROR` if the path is a non-empty directory and `recursive` is set to + `false` or on other similar errors. + + When you delete a large number of files, the delete operation is done in increments. The call returns + a response after approximately 45 seconds with an error message (503 Service Unavailable) asking you + to re-invoke the delete operation until the directory structure is fully deleted. + + For operations that delete more than 10K files, we discourage using the DBFS REST API, but advise you + to perform such operations in the context of a cluster, using the [File system utility + (dbutils.fs)](/dev-tools/databricks-utils.html#dbutils-fs). `dbutils.fs` covers the functional scope + of the DBFS REST API, but from notebooks. Running such operations using notebooks provides better + control and manageability, such as selective deletes, and the possibility to automate periodic delete + jobs. + + :param path: str + The path of the file or directory to delete. The path should be the absolute DBFS path. + :param recursive: bool (optional) + Whether or not to recursively delete the directory's contents. Deleting empty directories can be + done without providing the recursive flag. + + + + + .. py:method:: download(path: str) -> BinaryIO + + + Usage: + + .. code-block:: + + import io + import pathlib + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + root = pathlib.Path(f'/tmp/{time.time_ns()}') + + f = io.BytesIO(b"some text data") + w.dbfs.upload(f'{root}/01', f) + + with w.dbfs.download(f'{root}/01') as f: + assert f.read() == b"some text data" + + Download file from DBFS + + .. py:method:: exists(path: str) -> bool + + If file exists on DBFS + + .. py:method:: get_status(path: str) -> FileInfo + + Get the information of a file or directory. + + Gets the file information for a file or directory. If the file or directory does not exist, this call + throws an exception with `RESOURCE_DOES_NOT_EXIST`. + + :param path: str + The path of the file or directory. The path should be the absolute DBFS path. + + :returns: :class:`FileInfo` + + + .. py:method:: list(path: str [, recursive: bool = False]) -> Iterator[files.FileInfo] + + List directory contents or file details. + + List the contents of a directory, or details of the file. If the file or directory does not exist, + this call throws an exception with `RESOURCE_DOES_NOT_EXIST`. + + When calling list on a large directory, the list operation will time out after approximately 60 + seconds. + + :param recursive: traverse deep into directory tree + :returns iterator of metadata for every file + + + .. py:method:: mkdirs(path: str) + + Create a directory. + + Creates the given directory and necessary parent directories if they do not exist. If a file (not a + directory) exists at any prefix of the input path, this call throws an exception with + `RESOURCE_ALREADY_EXISTS`. **Note**: If this operation fails, it might have succeeded in creating some + of the necessary parent directories. + + :param path: str + The path of the new directory. The path should be the absolute DBFS path. + + + + + .. py:method:: move(source_path: str, destination_path: str) + + Move a file. + + Moves a file from one location to another location within DBFS. If the source file does not exist, + this call throws an exception with `RESOURCE_DOES_NOT_EXIST`. If a file already exists in the + destination path, this call throws an exception with `RESOURCE_ALREADY_EXISTS`. If the given source + path is a directory, this call always recursively moves all files.", + + :param source_path: str + The source path of the file or directory. The path should be the absolute DBFS path. + :param destination_path: str + The destination path of the file or directory. The path should be the absolute DBFS path. + + + + + .. py:method:: move_(src: str, dst: str [, recursive: bool = False, overwrite: bool = False]) + + Move files between local and DBFS systems + + .. py:method:: open(path: str [, read: bool = False, write: bool = False, overwrite: bool = False]) -> _DbfsIO + + + .. py:method:: put(path: str [, contents: Optional[str], overwrite: Optional[bool]]) + + Upload a file. + + Uploads a file through the use of multipart form post. It is mainly used for streaming uploads, but + can also be used as a convenient single call for data upload. + + Alternatively you can pass contents as base64 string. + + The amount of data that can be passed (when not streaming) using the __contents__ parameter is limited + to 1 MB. `MAX_BLOCK_SIZE_EXCEEDED` will be thrown if this limit is exceeded. + + If you want to upload large files, use the streaming upload. For details, see :method:dbfs/create, + :method:dbfs/addBlock, :method:dbfs/close. + + :param path: str + The path of the new file. The path should be the absolute DBFS path. + :param contents: str (optional) + This parameter might be absent, and instead a posted file will be used. + :param overwrite: bool (optional) + The flag that specifies whether to overwrite existing file/files. + + + + + .. py:method:: read(path: str [, length: Optional[int], offset: Optional[int]]) -> ReadResponse + + Get the contents of a file. + + Returns the contents of a file. If the file does not exist, this call throws an exception with + `RESOURCE_DOES_NOT_EXIST`. If the path is a directory, the read length is negative, or if the offset + is negative, this call throws an exception with `INVALID_PARAMETER_VALUE`. If the read length exceeds + 1 MB, this call throws an exception with `MAX_READ_SIZE_EXCEEDED`. + + If `offset + length` exceeds the number of bytes in a file, it reads the contents until the end of + file.", + + :param path: str + The path of the file to read. The path should be the absolute DBFS path. + :param length: int (optional) + The number of bytes to read starting from the offset. This has a limit of 1 MB, and a default value + of 0.5 MB. + :param offset: int (optional) + The offset to read from in bytes. + + :returns: :class:`ReadResponse` + + + .. py:method:: upload(path: str, src: BinaryIO [, overwrite: bool = False]) + + Upload file to DBFS \ No newline at end of file diff --git a/docs/workspace/files/index.rst b/docs/workspace/files/index.rst new file mode 100644 index 000000000..b2276ad6b --- /dev/null +++ b/docs/workspace/files/index.rst @@ -0,0 +1,10 @@ + +File Management +=============== + +Manage files on Databricks in a filesystem-like interface + +.. toctree:: + :maxdepth: 1 + + dbfs \ No newline at end of file diff --git a/docs/workspace/iam/account_access_control_proxy.rst b/docs/workspace/iam/account_access_control_proxy.rst new file mode 100644 index 000000000..3265b29cc --- /dev/null +++ b/docs/workspace/iam/account_access_control_proxy.rst @@ -0,0 +1,56 @@ +``w.account_access_control_proxy``: Account Access Control Proxy +================================================================ +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: AccountAccessControlProxyAPI + + These APIs manage access rules on resources in an account. Currently, only grant rules are supported. A + grant rule specifies a role assigned to a set of principals. A list of rules attached to a resource is + called a rule set. A workspace must belong to an account for these APIs to work. + + .. py:method:: get_assignable_roles_for_resource(resource: str) -> GetAssignableRolesForResourceResponse + + Get assignable roles for a resource. + + Gets all the roles that can be granted on an account-level resource. A role is grantable if the rule + set on the resource can contain an access rule of the role. + + :param resource: str + The resource name for which assignable roles will be listed. + + :returns: :class:`GetAssignableRolesForResourceResponse` + + + .. py:method:: get_rule_set(name: str, etag: str) -> RuleSetResponse + + Get a rule set. + + Get a rule set by its name. A rule set is always attached to a resource and contains a list of access + rules on the said resource. Currently only a default rule set for each resource is supported. + + :param name: str + The ruleset name associated with the request. + :param etag: str + Etag used for versioning. The response is at least as fresh as the eTag provided. Etag is used for + optimistic concurrency control as a way to help prevent simultaneous updates of a rule set from + overwriting each other. It is strongly suggested that systems make use of the etag in the read -> + modify -> write pattern to perform rule set updates in order to avoid race conditions that is get an + etag from a GET rule set request, and pass it with the PUT update request to identify the rule set + version you are updating. + + :returns: :class:`RuleSetResponse` + + + .. py:method:: update_rule_set(name: str, rule_set: RuleSetUpdateRequest) -> RuleSetResponse + + Update a rule set. + + Replace the rules of a rule set. First, use a GET rule set request to read the current version of the + rule set before modifying it. This pattern helps prevent conflicts between concurrent updates. + + :param name: str + Name of the rule set. + :param rule_set: :class:`RuleSetUpdateRequest` + + :returns: :class:`RuleSetResponse` + \ No newline at end of file diff --git a/docs/workspace/iam/current_user.rst b/docs/workspace/iam/current_user.rst new file mode 100644 index 000000000..b2ba795db --- /dev/null +++ b/docs/workspace/iam/current_user.rst @@ -0,0 +1,27 @@ +``w.current_user``: CurrentUser +=============================== +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: CurrentUserAPI + + This API allows retrieving information about currently authenticated user or service principal. + + .. py:method:: me() -> User + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + me2 = w.current_user.me() + + Get current user info. + + Get details about the current method caller's identity. + + :returns: :class:`User` + \ No newline at end of file diff --git a/docs/workspace/iam/groups.rst b/docs/workspace/iam/groups.rst new file mode 100644 index 000000000..0dd76485b --- /dev/null +++ b/docs/workspace/iam/groups.rst @@ -0,0 +1,185 @@ +``w.groups``: Groups +==================== +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: GroupsAPI + + Groups simplify identity management, making it easier to assign access to Databricks workspace, data, and + other securable objects. + + It is best practice to assign access to workspaces and access-control policies in Unity Catalog to groups, + instead of to users individually. All Databricks workspace identities can be assigned as members of + groups, and members inherit permissions that are assigned to their group. + + .. py:method:: create( [, display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], id: Optional[str], members: Optional[List[ComplexValue]], meta: Optional[ResourceMeta], roles: Optional[List[ComplexValue]], schemas: Optional[List[GroupSchema]]]) -> Group + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + # cleanup + w.groups.delete(id=group.id) + + Create a new group. + + Creates a group in the Databricks workspace with a unique name, using the supplied group details. + + :param display_name: str (optional) + String that represents a human-readable group name + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the group. See [assigning entitlements] for a full list of supported + values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param id: str (optional) + Databricks group ID + :param members: List[:class:`ComplexValue`] (optional) + :param meta: :class:`ResourceMeta` (optional) + Container for the group identifier. Workspace local versus account. + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`GroupSchema`] (optional) + The schema of the group. + + :returns: :class:`Group` + + + .. py:method:: delete(id: str) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + w.groups.delete(id=group.id) + + Delete a group. + + Deletes a group from the Databricks workspace. + + :param id: str + Unique ID for a group in the Databricks workspace. + + + + + .. py:method:: get(id: str) -> Group + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + fetch = w.groups.get(id=group.id) + + # cleanup + w.groups.delete(id=group.id) + + Get group details. + + Gets the information for a specific group in the Databricks workspace. + + :param id: str + Unique ID for a group in the Databricks workspace. + + :returns: :class:`Group` + + + .. py:method:: list( [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[ListSortOrder], start_index: Optional[int]]) -> Iterator[Group] + + List group details. + + Gets all details of the groups associated with the Databricks workspace. + + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. + :param sort_order: :class:`ListSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: Iterator over :class:`Group` + + + .. py:method:: patch(id: str [, operations: Optional[List[Patch]], schemas: Optional[List[PatchSchema]]]) + + Update group details. + + Partially updates the details of a group. + + :param id: str + Unique ID for a group in the Databricks workspace. + :param operations: List[:class:`Patch`] (optional) + :param schemas: List[:class:`PatchSchema`] (optional) + The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. + + + + + .. py:method:: update(id: str [, display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], members: Optional[List[ComplexValue]], meta: Optional[ResourceMeta], roles: Optional[List[ComplexValue]], schemas: Optional[List[GroupSchema]]]) + + Replace a group. + + Updates the details of a group by replacing the entire group entity. + + :param id: str + Databricks group ID + :param display_name: str (optional) + String that represents a human-readable group name + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the group. See [assigning entitlements] for a full list of supported + values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param members: List[:class:`ComplexValue`] (optional) + :param meta: :class:`ResourceMeta` (optional) + Container for the group identifier. Workspace local versus account. + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`GroupSchema`] (optional) + The schema of the group. + + + \ No newline at end of file diff --git a/docs/workspace/iam/index.rst b/docs/workspace/iam/index.rst new file mode 100644 index 000000000..11e9318f9 --- /dev/null +++ b/docs/workspace/iam/index.rst @@ -0,0 +1,15 @@ + +Identity and Access Management +============================== + +Manage users, service principals, groups and their permissions in Accounts and Workspaces + +.. toctree:: + :maxdepth: 1 + + account_access_control_proxy + current_user + groups + permissions + service_principals + users \ No newline at end of file diff --git a/docs/workspace/iam/permissions.rst b/docs/workspace/iam/permissions.rst new file mode 100644 index 000000000..243b633c6 --- /dev/null +++ b/docs/workspace/iam/permissions.rst @@ -0,0 +1,181 @@ +``w.permissions``: Permissions +============================== +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: PermissionsAPI + + Permissions API are used to create read, write, edit, update and manage access for various users on + different objects and endpoints. + + * **[Cluster permissions](:service:clusters)** — Manage which users can manage, restart, or attach to + clusters. + + * **[Cluster policy permissions](:service:clusterpolicies)** — Manage which users can use cluster + policies. + + * **[Delta Live Tables pipeline permissions](:service:pipelines)** — Manage which users can view, + manage, run, cancel, or own a Delta Live Tables pipeline. + + * **[Job permissions](:service:jobs)** — Manage which users can view, manage, trigger, cancel, or own a + job. + + * **[MLflow experiment permissions](:service:experiments)** — Manage which users can read, edit, or + manage MLflow experiments. + + * **[MLflow registered model permissions](:service:modelregistry)** — Manage which users can read, edit, + or manage MLflow registered models. + + * **[Password permissions](:service:users)** — Manage which users can use password login when SSO is + enabled. + + * **[Instance Pool permissions](:service:instancepools)** — Manage which users can manage or attach to + pools. + + * **[Repo permissions](repos)** — Manage which users can read, run, edit, or manage a repo. + + * **[Serving endpoint permissions](:service:servingendpoints)** — Manage which users can view, query, or + manage a serving endpoint. + + * **[SQL warehouse permissions](:service:warehouses)** — Manage which users can use or manage SQL + warehouses. + + * **[Token permissions](:service:tokenmanagement)** — Manage which users can create or use tokens. + + * **[Workspace object permissions](:service:workspace)** — Manage which users can read, run, edit, or + manage directories, files, and notebooks. + + For the mapping of the required permissions for specific actions or abilities and other important + information, see [Access Control]. + + [Access Control]: https://docs.databricks.com/security/auth-authz/access-control/index.html + + .. py:method:: get(request_object_type: str, request_object_id: str) -> ObjectPermissions + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + obj = w.workspace.get_status(path=notebook_path) + + levels = w.permissions.get_permission_levels(request_object_type="notebooks", + request_object_id="%d" % (obj.object_id)) + + Get object permissions. + + Gets the permissions of an object. Objects can inherit permissions from their parent objects or root + object. + + :param request_object_type: str + The type of the request object. Can be one of the following: authorization, clusters, + cluster-policies, directories, experiments, files, instance-pools, jobs, notebooks, pipelines, + registered-models, repos, serving-endpoints, or sql-warehouses. + :param request_object_id: str + The id of the request object. + + :returns: :class:`ObjectPermissions` + + + .. py:method:: get_permission_levels(request_object_type: str, request_object_id: str) -> GetPermissionLevelsResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + obj = w.workspace.get_status(path=notebook_path) + + levels = w.permissions.get_permission_levels(request_object_type="notebooks", + request_object_id="%d" % (obj.object_id)) + + Get object permission levels. + + Gets the permission levels that a user can have on an object. + + :param request_object_type: str + + :param request_object_id: str + + + :returns: :class:`GetPermissionLevelsResponse` + + + .. py:method:: set(request_object_type: str, request_object_id: str [, access_control_list: Optional[List[AccessControlRequest]]]) -> ObjectPermissions + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + obj = w.workspace.get_status(path=notebook_path) + + _ = w.permissions.set(request_object_type="notebooks", + request_object_id="%d" % (obj.object_id), + access_control_list=[ + iam.AccessControlRequest(group_name=group.display_name, + permission_level=iam.PermissionLevel.CAN_RUN) + ]) + + # cleanup + w.groups.delete(id=group.id) + + Set object permissions. + + Sets permissions on an object. Objects can inherit permissions from their parent objects or root + object. + + :param request_object_type: str + The type of the request object. Can be one of the following: authorization, clusters, + cluster-policies, directories, experiments, files, instance-pools, jobs, notebooks, pipelines, + registered-models, repos, serving-endpoints, or sql-warehouses. + :param request_object_id: str + The id of the request object. + :param access_control_list: List[:class:`AccessControlRequest`] (optional) + + :returns: :class:`ObjectPermissions` + + + .. py:method:: update(request_object_type: str, request_object_id: str [, access_control_list: Optional[List[AccessControlRequest]]]) -> ObjectPermissions + + Update object permissions. + + Updates the permissions on an object. Objects can inherit permissions from their parent objects or + root object. + + :param request_object_type: str + The type of the request object. Can be one of the following: authorization, clusters, + cluster-policies, directories, experiments, files, instance-pools, jobs, notebooks, pipelines, + registered-models, repos, serving-endpoints, or sql-warehouses. + :param request_object_id: str + The id of the request object. + :param access_control_list: List[:class:`AccessControlRequest`] (optional) + + :returns: :class:`ObjectPermissions` + \ No newline at end of file diff --git a/docs/workspace/iam/service_principals.rst b/docs/workspace/iam/service_principals.rst new file mode 100644 index 000000000..41829fc87 --- /dev/null +++ b/docs/workspace/iam/service_principals.rst @@ -0,0 +1,241 @@ +``w.service_principals``: Service Principals +============================================ +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: ServicePrincipalsAPI + + Identities for use with jobs, automated tools, and systems such as scripts, apps, and CI/CD platforms. + Databricks recommends creating service principals to run production jobs or modify production data. If all + processes that act on production data run with service principals, interactive users do not need any + write, delete, or modify privileges in production. This eliminates the risk of a user overwriting + production data by accident. + + .. py:method:: create( [, active: Optional[bool], application_id: Optional[str], display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], id: Optional[str], roles: Optional[List[ComplexValue]], schemas: Optional[List[ServicePrincipalSchema]]]) -> ServicePrincipal + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + groups = w.groups.group_display_name_to_id_map(iam.ListGroupsRequest()) + + spn = w.service_principals.create(display_name=f'sdk-{time.time_ns()}', + groups=[iam.ComplexValue(value=groups["admins"])]) + + # cleanup + w.service_principals.delete(id=spn.id) + + Create a service principal. + + Creates a new service principal in the Databricks workspace. + + :param active: bool (optional) + If this user is active + :param application_id: str (optional) + UUID relating to the service principal + :param display_name: str (optional) + String that represents a concatenation of given and family names. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the service principal. See [assigning entitlements] for a full list of + supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param id: str (optional) + Databricks service principal ID. + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`ServicePrincipalSchema`] (optional) + The schema of the List response. + + :returns: :class:`ServicePrincipal` + + + .. py:method:: delete(id: str) + + Delete a service principal. + + Delete a single service principal in the Databricks workspace. + + :param id: str + Unique ID for a service principal in the Databricks workspace. + + + + + .. py:method:: get(id: str) -> ServicePrincipal + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.service_principals.create(display_name=f'sdk-{time.time_ns()}') + + by_id = w.service_principals.get(id=created.id) + + # cleanup + w.service_principals.delete(id=created.id) + + Get service principal details. + + Gets the details for a single service principal define in the Databricks workspace. + + :param id: str + Unique ID for a service principal in the Databricks workspace. + + :returns: :class:`ServicePrincipal` + + + .. py:method:: list( [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[ListSortOrder], start_index: Optional[int]]) -> Iterator[ServicePrincipal] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + sp_create = a.service_principals.create(active=True, display_name=f'sdk-{time.time_ns()}') + + sp = a.service_principals.get(id=sp_create.id) + + sp_list = a.service_principals.list(filter="displayName eq %v" % (sp.display_name)) + + # cleanup + a.service_principals.delete(id=sp_create.id) + + List service principals. + + Gets the set of service principals associated with a Databricks workspace. + + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. + :param sort_order: :class:`ListSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: Iterator over :class:`ServicePrincipal` + + + .. py:method:: patch(id: str [, operations: Optional[List[Patch]], schemas: Optional[List[PatchSchema]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + from databricks.sdk.service import iam + + a = AccountClient() + + sp_create = a.service_principals.create(active=True, display_name=f'sdk-{time.time_ns()}') + + sp = a.service_principals.get(id=sp_create.id) + + a.service_principals.patch(id=sp.id, + operations=[iam.Patch(op=iam.PatchOp.REPLACE, path="active", value="false")], + schemas=[iam.PatchSchema.URN_IETF_PARAMS_SCIM_API_MESSAGES_2_0_PATCH_OP]) + + # cleanup + a.service_principals.delete(id=sp_create.id) + + Update service principal details. + + Partially updates the details of a single service principal in the Databricks workspace. + + :param id: str + Unique ID for a service principal in the Databricks workspace. + :param operations: List[:class:`Patch`] (optional) + :param schemas: List[:class:`PatchSchema`] (optional) + The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. + + + + + .. py:method:: update(id: str [, active: Optional[bool], application_id: Optional[str], display_name: Optional[str], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], roles: Optional[List[ComplexValue]], schemas: Optional[List[ServicePrincipalSchema]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + created = w.service_principals.create(display_name=f'sdk-{time.time_ns()}') + + w.service_principals.update(id=created.id, + display_name=f'sdk-{time.time_ns()}', + roles=[iam.ComplexValue(value="xyz")]) + + # cleanup + w.service_principals.delete(id=created.id) + + Replace service principal. + + Updates the details of a single service principal. + + This action replaces the existing service principal with the same name. + + :param id: str + Databricks service principal ID. + :param active: bool (optional) + If this user is active + :param application_id: str (optional) + UUID relating to the service principal + :param display_name: str (optional) + String that represents a concatenation of given and family names. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the service principal. See [assigning entitlements] for a full list of + supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + :param groups: List[:class:`ComplexValue`] (optional) + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`ServicePrincipalSchema`] (optional) + The schema of the List response. + + + \ No newline at end of file diff --git a/docs/workspace/iam/users.rst b/docs/workspace/iam/users.rst new file mode 100644 index 000000000..2adaee935 --- /dev/null +++ b/docs/workspace/iam/users.rst @@ -0,0 +1,316 @@ +``w.users``: Users +================== +.. currentmodule:: databricks.sdk.service.iam + +.. py:class:: UsersAPI + + User identities recognized by Databricks and represented by email addresses. + + Databricks recommends using SCIM provisioning to sync users and groups automatically from your identity + provider to your Databricks workspace. SCIM streamlines onboarding a new employee or team by using your + identity provider to create users and groups in Databricks workspace and give them the proper level of + access. When a user leaves your organization or no longer needs access to Databricks workspace, admins can + terminate the user in your identity provider and that user’s account will also be removed from + Databricks workspace. This ensures a consistent offboarding process and prevents unauthorized users from + accessing sensitive data. + + .. py:method:: create( [, active: Optional[bool], display_name: Optional[str], emails: Optional[List[ComplexValue]], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], id: Optional[str], name: Optional[Name], roles: Optional[List[ComplexValue]], schemas: Optional[List[UserSchema]], user_name: Optional[str]]) -> User + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + user = a.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + # cleanup + a.users.delete(id=user.id) + + Create a new user. + + Creates a new user in the Databricks workspace. This new user will also be added to the Databricks + account. + + :param active: bool (optional) + If this user is active + :param display_name: str (optional) + String that represents a concatenation of given and family names. For example `John Smith`. This + field cannot be updated through the Workspace SCIM APIs when [identity federation is enabled]. Use + Account SCIM APIs to update `displayName`. + + [identity federation is enabled]: https://docs.databricks.com/administration-guide/users-groups/best-practices.html#enable-identity-federation + :param emails: List[:class:`ComplexValue`] (optional) + All the emails associated with the Databricks user. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the user. See [assigning entitlements] for a full list of supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + External ID is not currently supported. It is reserved for future use. + :param groups: List[:class:`ComplexValue`] (optional) + :param id: str (optional) + Databricks user ID. This is automatically set by Databricks. Any value provided by the client will + be ignored. + :param name: :class:`Name` (optional) + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`UserSchema`] (optional) + The schema of the user. + :param user_name: str (optional) + Email address of the Databricks user. + + :returns: :class:`User` + + + .. py:method:: delete(id: str) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + other_owner = w.users.create(user_name=f'sdk-{time.time_ns()}@example.com') + + w.users.delete(id=other_owner.id) + + Delete a user. + + Deletes a user. Deleting a user from a Databricks workspace also removes objects associated with the + user. + + :param id: str + Unique ID for a user in the Databricks workspace. + + + + + .. py:method:: get(id: str [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[GetSortOrder], start_index: Optional[int]]) -> User + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import AccountClient + + a = AccountClient() + + user = a.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + by_id = a.users.get(id=user.id) + + # cleanup + a.users.delete(id=user.id) + + Get user details. + + Gets information for a specific user in Databricks workspace. + + :param id: str + Unique ID for a user in the Databricks workspace. + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. Multi-part paths are supported. For example, `userName`, + `name.givenName`, and `emails`. + :param sort_order: :class:`GetSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: :class:`User` + + + .. py:method:: get_permission_levels() -> GetPasswordPermissionLevelsResponse + + Get password permission levels. + + Gets the permission levels that a user can have on an object. + + :returns: :class:`GetPasswordPermissionLevelsResponse` + + + .. py:method:: get_permissions() -> PasswordPermissions + + Get password permissions. + + Gets the permissions of all passwords. Passwords can inherit permissions from their root object. + + :returns: :class:`PasswordPermissions` + + + .. py:method:: list( [, attributes: Optional[str], count: Optional[int], excluded_attributes: Optional[str], filter: Optional[str], sort_by: Optional[str], sort_order: Optional[ListSortOrder], start_index: Optional[int]]) -> Iterator[User] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + all_users = w.users.list(attributes="id,userName", + sort_by="userName", + sort_order=iam.ListSortOrder.DESCENDING) + + List users. + + Gets details for all the users associated with a Databricks workspace. + + :param attributes: str (optional) + Comma-separated list of attributes to return in response. + :param count: int (optional) + Desired number of results per page. + :param excluded_attributes: str (optional) + Comma-separated list of attributes to exclude in response. + :param filter: str (optional) + Query by which the results have to be filtered. Supported operators are equals(`eq`), + contains(`co`), starts with(`sw`) and not equals(`ne`). Additionally, simple expressions can be + formed using logical operators - `and` and `or`. The [SCIM RFC] has more details but we currently + only support simple expressions. + + [SCIM RFC]: https://tools.ietf.org/html/rfc7644#section-3.4.2.2 + :param sort_by: str (optional) + Attribute to sort the results. Multi-part paths are supported. For example, `userName`, + `name.givenName`, and `emails`. + :param sort_order: :class:`ListSortOrder` (optional) + The order to sort the results. + :param start_index: int (optional) + Specifies the index of the first result. First item is number 1. + + :returns: Iterator over :class:`User` + + + .. py:method:: patch(id: str [, operations: Optional[List[Patch]], schemas: Optional[List[PatchSchema]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + user = w.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + w.users.patch(id=user.id, + operations=[iam.Patch(op=iam.PatchOp.REPLACE, path="active", value="false")], + schemas=[iam.PatchSchema.URN_IETF_PARAMS_SCIM_API_MESSAGES_2_0_PATCH_OP]) + + Update user details. + + Partially updates a user resource by applying the supplied operations on specific user attributes. + + :param id: str + Unique ID for a user in the Databricks workspace. + :param operations: List[:class:`Patch`] (optional) + :param schemas: List[:class:`PatchSchema`] (optional) + The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. + + + + + .. py:method:: set_permissions( [, access_control_list: Optional[List[PasswordAccessControlRequest]]]) -> PasswordPermissions + + Set password permissions. + + Sets permissions on all passwords. Passwords can inherit permissions from their root object. + + :param access_control_list: List[:class:`PasswordAccessControlRequest`] (optional) + + :returns: :class:`PasswordPermissions` + + + .. py:method:: update(id: str [, active: Optional[bool], display_name: Optional[str], emails: Optional[List[ComplexValue]], entitlements: Optional[List[ComplexValue]], external_id: Optional[str], groups: Optional[List[ComplexValue]], name: Optional[Name], roles: Optional[List[ComplexValue]], schemas: Optional[List[UserSchema]], user_name: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + user = w.users.create(display_name=f'sdk-{time.time_ns()}', user_name=f'sdk-{time.time_ns()}@example.com') + + w.users.update(id=user.id, user_name=user.user_name, active=True) + + Replace a user. + + Replaces a user's information with the data supplied in request. + + :param id: str + Databricks user ID. This is automatically set by Databricks. Any value provided by the client will + be ignored. + :param active: bool (optional) + If this user is active + :param display_name: str (optional) + String that represents a concatenation of given and family names. For example `John Smith`. This + field cannot be updated through the Workspace SCIM APIs when [identity federation is enabled]. Use + Account SCIM APIs to update `displayName`. + + [identity federation is enabled]: https://docs.databricks.com/administration-guide/users-groups/best-practices.html#enable-identity-federation + :param emails: List[:class:`ComplexValue`] (optional) + All the emails associated with the Databricks user. + :param entitlements: List[:class:`ComplexValue`] (optional) + Entitlements assigned to the user. See [assigning entitlements] for a full list of supported values. + + [assigning entitlements]: https://docs.databricks.com/administration-guide/users-groups/index.html#assigning-entitlements + :param external_id: str (optional) + External ID is not currently supported. It is reserved for future use. + :param groups: List[:class:`ComplexValue`] (optional) + :param name: :class:`Name` (optional) + :param roles: List[:class:`ComplexValue`] (optional) + Corresponds to AWS instance profile/arn role. + :param schemas: List[:class:`UserSchema`] (optional) + The schema of the user. + :param user_name: str (optional) + Email address of the Databricks user. + + + + + .. py:method:: update_permissions( [, access_control_list: Optional[List[PasswordAccessControlRequest]]]) -> PasswordPermissions + + Update password permissions. + + Updates the permissions on all passwords. Passwords can inherit permissions from their root object. + + :param access_control_list: List[:class:`PasswordAccessControlRequest`] (optional) + + :returns: :class:`PasswordPermissions` + \ No newline at end of file diff --git a/docs/workspace/index.rst b/docs/workspace/index.rst new file mode 100644 index 000000000..de76f7c5f --- /dev/null +++ b/docs/workspace/index.rst @@ -0,0 +1,23 @@ + +Workspace APIs +============== + +These APIs are available from WorkspaceClient + +.. toctree:: + :maxdepth: 1 + + workspace/index + compute/index + jobs/index + pipelines/index + files/index + ml/index + serving/index + iam/index + sql/index + catalog/index + sharing/index + settings/index + vectorsearch/index + dashboards/index \ No newline at end of file diff --git a/docs/workspace/jobs/index.rst b/docs/workspace/jobs/index.rst new file mode 100644 index 000000000..a8f242ea2 --- /dev/null +++ b/docs/workspace/jobs/index.rst @@ -0,0 +1,10 @@ + +Jobs +==== + +Schedule automated jobs on Databricks Workspaces + +.. toctree:: + :maxdepth: 1 + + jobs \ No newline at end of file diff --git a/docs/workspace/jobs/jobs.rst b/docs/workspace/jobs/jobs.rst new file mode 100644 index 000000000..54a42c979 --- /dev/null +++ b/docs/workspace/jobs/jobs.rst @@ -0,0 +1,1096 @@ +``w.jobs``: Jobs +================ +.. currentmodule:: databricks.sdk.service.jobs + +.. py:class:: JobsAPI + + The Jobs API allows you to create, edit, and delete jobs. + + You can use a Databricks job to run a data processing or data analysis task in a Databricks cluster with + scalable resources. Your job can consist of a single task or can be a large, multi-task workflow with + complex dependencies. Databricks manages the task orchestration, cluster management, monitoring, and error + reporting for all of your jobs. You can run your jobs immediately or periodically through an easy-to-use + scheduling system. You can implement job tasks using notebooks, JARS, Delta Live Tables pipelines, or + Python, Scala, Spark submit, and Java applications. + + You should never hard code secrets or store them in plain text. Use the [Secrets CLI] to manage secrets in + the [Databricks CLI]. Use the [Secrets utility] to reference secrets in notebooks and jobs. + + [Databricks CLI]: https://docs.databricks.com/dev-tools/cli/index.html + [Secrets CLI]: https://docs.databricks.com/dev-tools/cli/secrets-cli.html + [Secrets utility]: https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-secrets + + .. py:method:: cancel_all_runs( [, all_queued_runs: Optional[bool], job_id: Optional[int]]) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + w.jobs.cancel_all_runs(job_id=created_job.job_id) + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Cancel all runs of a job. + + Cancels all active runs of a job. The runs are canceled asynchronously, so it doesn't prevent new runs + from being started. + + :param all_queued_runs: bool (optional) + Optional boolean parameter to cancel all queued runs. If no job_id is provided, all queued runs in + the workspace are canceled. + :param job_id: int (optional) + The canonical identifier of the job to cancel all runs of. + + + + + .. py:method:: cancel_run(run_id: int) -> Wait[Run] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + run_now_response = w.jobs.run_now(job_id=created_job.job_id) + + cancelled_run = w.jobs.cancel_run(run_id=run_now_response.response.run_id).result() + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Cancel a run. + + Cancels a job run or a task run. The run is canceled asynchronously, so it may still be running when + this request completes. + + :param run_id: int + This field is required. + + :returns: + Long-running operation waiter for :class:`Run`. + See :method:wait_get_run_job_terminated_or_skipped for more details. + + + .. py:method:: cancel_run_and_wait(run_id: int, timeout: datetime.timedelta = 0:20:00) -> Run + + + .. py:method:: create( [, access_control_list: Optional[List[iam.AccessControlRequest]], compute: Optional[List[JobCompute]], continuous: Optional[Continuous], deployment: Optional[JobDeployment], description: Optional[str], edit_mode: Optional[CreateJobEditMode], email_notifications: Optional[JobEmailNotifications], format: Optional[Format], git_source: Optional[GitSource], health: Optional[JobsHealthRules], job_clusters: Optional[List[JobCluster]], max_concurrent_runs: Optional[int], name: Optional[str], notification_settings: Optional[JobNotificationSettings], parameters: Optional[List[JobParameterDefinition]], queue: Optional[QueueSettings], run_as: Optional[JobRunAs], schedule: Optional[CronSchedule], tags: Optional[Dict[str, str]], tasks: Optional[List[Task]], timeout_seconds: Optional[int], trigger: Optional[TriggerSettings], webhook_notifications: Optional[WebhookNotifications]]) -> CreateResponse + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Create a new job. + + Create a new job. + + :param access_control_list: List[:class:`AccessControlRequest`] (optional) + List of permissions to set on the job. + :param compute: List[:class:`JobCompute`] (optional) + A list of compute requirements that can be referenced by tasks of this job. + :param continuous: :class:`Continuous` (optional) + An optional continuous property for this job. The continuous property will ensure that there is + always one run executing. Only one of `schedule` and `continuous` can be used. + :param deployment: :class:`JobDeployment` (optional) + Deployment information for jobs managed by external sources. + :param description: str (optional) + An optional description for the job. The maximum length is 1024 characters in UTF-8 encoding. + :param edit_mode: :class:`CreateJobEditMode` (optional) + Edit mode of the job. + + * `UI_LOCKED`: The job is in a locked UI state and cannot be modified. * `EDITABLE`: The job is in + an editable state and can be modified. + :param email_notifications: :class:`JobEmailNotifications` (optional) + An optional set of email addresses that is notified when runs of this job begin or complete as well + as when this job is deleted. + :param format: :class:`Format` (optional) + Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When + using the Jobs API 2.1 this value is always set to `"MULTI_TASK"`. + :param git_source: :class:`GitSource` (optional) + An optional specification for a remote Git repository containing the source code used by tasks. + Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. + + If `git_source` is set, these tasks retrieve the file from the remote repository by default. + However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. + + Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are + used, `git_source` must be defined on the job. + :param health: :class:`JobsHealthRules` (optional) + An optional set of health rules that can be defined for this job. + :param job_clusters: List[:class:`JobCluster`] (optional) + A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries + cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. + :param max_concurrent_runs: int (optional) + An optional maximum allowed number of concurrent runs of the job. + + Set this value if you want to be able to execute multiple runs of the same job concurrently. This is + useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs + to overlap with each other, or if you want to trigger multiple runs which differ by their input + parameters. + + This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are + 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. + However, from then on, new runs are skipped unless there are fewer than 3 active runs. + + This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. + :param name: str (optional) + An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. + :param notification_settings: :class:`JobNotificationSettings` (optional) + Optional notification settings that are used when sending notifications to each of the + `email_notifications` and `webhook_notifications` for this job. + :param parameters: List[:class:`JobParameterDefinition`] (optional) + Job-level parameter definitions + :param queue: :class:`QueueSettings` (optional) + The queue settings of the job. + :param run_as: :class:`JobRunAs` (optional) + Write-only setting, available only in Create/Update/Reset and Submit calls. Specifies the user or + service principal that the job runs as. If not specified, the job runs as the user who created the + job. + + Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is + thrown. + :param schedule: :class:`CronSchedule` (optional) + An optional periodic schedule for this job. The default behavior is that the job only runs when + triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + :param tags: Dict[str,str] (optional) + A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs + clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added + to the job. + :param tasks: List[:class:`Task`] (optional) + A list of task specifications to be executed by this job. + :param timeout_seconds: int (optional) + An optional timeout applied to each run of this job. A value of `0` means no timeout. + :param trigger: :class:`TriggerSettings` (optional) + Trigger settings for the job. Can be used to trigger a run when new files arrive in an external + location. The default behavior is that the job runs only when triggered by clicking “Run Now” in + the Jobs UI or sending an API request to `runNow`. + :param webhook_notifications: :class:`WebhookNotifications` (optional) + A collection of system notification IDs to notify when runs of this job begin or complete. + + :returns: :class:`CreateResponse` + + + .. py:method:: delete(job_id: int) + + Delete a job. + + Deletes a job. + + :param job_id: int + The canonical identifier of the job to delete. This field is required. + + + + + .. py:method:: delete_run(run_id: int) + + Delete a job run. + + Deletes a non-active run. Returns an error if the run is active. + + :param run_id: int + The canonical identifier of the run for which to retrieve the metadata. + + + + + .. py:method:: export_run(run_id: int [, views_to_export: Optional[ViewsToExport]]) -> ExportRunOutput + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + run_by_id = w.jobs.run_now(job_id=created_job.job_id).result() + + exported_view = w.jobs.export_run(run_id=run_by_id.tasks[0].run_id, views_to_export="CODE") + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Export and retrieve a job run. + + Export and retrieve the job run task. + + :param run_id: int + The canonical identifier for the run. This field is required. + :param views_to_export: :class:`ViewsToExport` (optional) + Which views to export (CODE, DASHBOARDS, or ALL). Defaults to CODE. + + :returns: :class:`ExportRunOutput` + + + .. py:method:: get(job_id: int) -> Job + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + run = w.jobs.submit(run_name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.SubmitTask(existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key=f'sdk-{time.time_ns()}') + ]).result() + + output = w.jobs.get_run_output(run_id=run.tasks[0].run_id) + + # cleanup + w.jobs.delete_run(run_id=run.run_id) + + Get a single job. + + Retrieves the details for a single job. + + :param job_id: int + The canonical identifier of the job to retrieve information about. This field is required. + + :returns: :class:`Job` + + + .. py:method:: get_permission_levels(job_id: str) -> GetJobPermissionLevelsResponse + + Get job permission levels. + + Gets the permission levels that a user can have on an object. + + :param job_id: str + The job for which to get or manage permissions. + + :returns: :class:`GetJobPermissionLevelsResponse` + + + .. py:method:: get_permissions(job_id: str) -> JobPermissions + + Get job permissions. + + Gets the permissions of a job. Jobs can inherit permissions from their root object. + + :param job_id: str + The job for which to get or manage permissions. + + :returns: :class:`JobPermissions` + + + .. py:method:: get_run(run_id: int [, include_history: Optional[bool], include_resolved_values: Optional[bool]]) -> Run + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + run = w.jobs.submit(run_name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.SubmitTask(existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key=f'sdk-{time.time_ns()}') + ]).result() + + output = w.jobs.get_run_output(run_id=run.tasks[0].run_id) + + # cleanup + w.jobs.delete_run(run_id=run.run_id) + + Get a single job run. + + Retrieve the metadata of a run. + + :param run_id: int + The canonical identifier of the run for which to retrieve the metadata. This field is required. + :param include_history: bool (optional) + Whether to include the repair history in the response. + :param include_resolved_values: bool (optional) + Whether to include resolved parameter values in the response. + + :returns: :class:`Run` + + + .. py:method:: get_run_output(run_id: int) -> RunOutput + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + run = w.jobs.submit(run_name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.SubmitTask(existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key=f'sdk-{time.time_ns()}') + ]).result() + + output = w.jobs.get_run_output(run_id=run.tasks[0].run_id) + + # cleanup + w.jobs.delete_run(run_id=run.run_id) + + Get the output for a single run. + + Retrieve the output and metadata of a single task run. When a notebook task returns a value through + the `dbutils.notebook.exit()` call, you can use this endpoint to retrieve that value. Databricks + restricts this API to returning the first 5 MB of the output. To return a larger result, you can store + job results in a cloud storage service. + + This endpoint validates that the __run_id__ parameter is valid and returns an HTTP status code 400 if + the __run_id__ parameter is invalid. Runs are automatically removed after 60 days. If you to want to + reference them beyond 60 days, you must save old run results before they expire. + + :param run_id: int + The canonical identifier for the run. This field is required. + + :returns: :class:`RunOutput` + + + .. py:method:: list( [, expand_tasks: Optional[bool], limit: Optional[int], name: Optional[str], offset: Optional[int], page_token: Optional[str]]) -> Iterator[BaseJob] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + run_list = w.jobs.list_runs(job_id=created_job.job_id) + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + List jobs. + + Retrieves a list of jobs. + + :param expand_tasks: bool (optional) + Whether to include task and cluster details in the response. + :param limit: int (optional) + The number of jobs to return. This value must be greater than 0 and less or equal to 100. The + default value is 20. + :param name: str (optional) + A filter on the list based on the exact (case insensitive) job name. + :param offset: int (optional) + The offset of the first job to return, relative to the most recently created job. + + Deprecated since June 2023. Use `page_token` to iterate through the pages instead. + :param page_token: str (optional) + Use `next_page_token` or `prev_page_token` returned from the previous request to list the next or + previous page of jobs respectively. + + :returns: Iterator over :class:`BaseJob` + + + .. py:method:: list_runs( [, active_only: Optional[bool], completed_only: Optional[bool], expand_tasks: Optional[bool], job_id: Optional[int], limit: Optional[int], offset: Optional[int], page_token: Optional[str], run_type: Optional[ListRunsRunType], start_time_from: Optional[int], start_time_to: Optional[int]]) -> Iterator[BaseRun] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + run_list = w.jobs.list_runs(job_id=created_job.job_id) + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + List job runs. + + List runs in descending order by start time. + + :param active_only: bool (optional) + If active_only is `true`, only active runs are included in the results; otherwise, lists both active + and completed runs. An active run is a run in the `QUEUED`, `PENDING`, `RUNNING`, or `TERMINATING`. + This field cannot be `true` when completed_only is `true`. + :param completed_only: bool (optional) + If completed_only is `true`, only completed runs are included in the results; otherwise, lists both + active and completed runs. This field cannot be `true` when active_only is `true`. + :param expand_tasks: bool (optional) + Whether to include task and cluster details in the response. + :param job_id: int (optional) + The job for which to list runs. If omitted, the Jobs service lists runs from all jobs. + :param limit: int (optional) + The number of runs to return. This value must be greater than 0 and less than 25. The default value + is 20. If a request specifies a limit of 0, the service instead uses the maximum limit. + :param offset: int (optional) + The offset of the first run to return, relative to the most recent run. + + Deprecated since June 2023. Use `page_token` to iterate through the pages instead. + :param page_token: str (optional) + Use `next_page_token` or `prev_page_token` returned from the previous request to list the next or + previous page of runs respectively. + :param run_type: :class:`ListRunsRunType` (optional) + The type of runs to return. For a description of run types, see :method:jobs/getRun. + :param start_time_from: int (optional) + Show runs that started _at or after_ this value. The value must be a UTC timestamp in milliseconds. + Can be combined with _start_time_to_ to filter by a time range. + :param start_time_to: int (optional) + Show runs that started _at or before_ this value. The value must be a UTC timestamp in milliseconds. + Can be combined with _start_time_from_ to filter by a time range. + + :returns: Iterator over :class:`BaseRun` + + + .. py:method:: repair_run(run_id: int [, dbt_commands: Optional[List[str]], jar_params: Optional[List[str]], job_parameters: Optional[Dict[str, str]], latest_repair_id: Optional[int], notebook_params: Optional[Dict[str, str]], pipeline_params: Optional[PipelineParams], python_named_params: Optional[Dict[str, str]], python_params: Optional[List[str]], rerun_all_failed_tasks: Optional[bool], rerun_dependent_tasks: Optional[bool], rerun_tasks: Optional[List[str]], spark_submit_params: Optional[List[str]], sql_params: Optional[Dict[str, str]]]) -> Wait[Run] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + run_now_response = w.jobs.run_now(job_id=created_job.job_id) + + cancelled_run = w.jobs.cancel_run(run_id=run_now_response.response.run_id).result() + + repaired_run = w.jobs.repair_run(rerun_tasks=[cancelled_run.tasks[0].task_key], + run_id=run_now_response.response.run_id).result() + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Repair a job run. + + Re-run one or more tasks. Tasks are re-run as part of the original job run. They use the current job + and task settings, and can be viewed in the history for the original job run. + + :param run_id: int + The job run ID of the run to repair. The run must not be in progress. + :param dbt_commands: List[str] (optional) + An array of commands to execute for jobs with the dbt task, for example `"dbt_commands": ["dbt + deps", "dbt seed", "dbt run"]` + :param jar_params: List[str] (optional) + A list of parameters for jobs with Spark JAR tasks, for example `"jar_params": ["john doe", "35"]`. + The parameters are used to invoke the main function of the main class specified in the Spark JAR + task. If not specified upon `run-now`, it defaults to an empty list. jar_params cannot be specified + in conjunction with notebook_params. The JSON representation of this field (for example + `{"jar_params":["john doe","35"]}`) cannot exceed 10,000 bytes. + + Use [Task parameter variables](/jobs.html"#parameter-variables") to set parameters containing + information about job runs. + :param job_parameters: Dict[str,str] (optional) + Job-level parameters used in the run. for example `"param": "overriding_val"` + :param latest_repair_id: int (optional) + The ID of the latest repair. This parameter is not required when repairing a run for the first time, + but must be provided on subsequent requests to repair the same run. + :param notebook_params: Dict[str,str] (optional) + A map from keys to values for jobs with notebook task, for example `"notebook_params": {"name": + "john doe", "age": "35"}`. The map is passed to the notebook and is accessible through the + [dbutils.widgets.get] function. + + If not specified upon `run-now`, the triggered run uses the job’s base parameters. + + notebook_params cannot be specified in conjunction with jar_params. + + Use [Task parameter variables] to set parameters containing information about job runs. + + The JSON representation of this field (for example `{"notebook_params":{"name":"john + doe","age":"35"}}`) cannot exceed 10,000 bytes. + + [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables + [dbutils.widgets.get]: https://docs.databricks.com/dev-tools/databricks-utils.html + :param pipeline_params: :class:`PipelineParams` (optional) + :param python_named_params: Dict[str,str] (optional) + A map from keys to values for jobs with Python wheel task, for example `"python_named_params": + {"name": "task", "data": "dbfs:/path/to/data.json"}`. + :param python_params: List[str] (optional) + A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. + The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it + would overwrite the parameters specified in job setting. The JSON representation of this field (for + example `{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes. + + Use [Task parameter variables] to set parameters containing information about job runs. + + Important + + These parameters accept only Latin characters (ASCII character set). Using non-ASCII characters + returns an error. Examples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and + emojis. + + [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables + :param rerun_all_failed_tasks: bool (optional) + If true, repair all failed tasks. Only one of `rerun_tasks` or `rerun_all_failed_tasks` can be used. + :param rerun_dependent_tasks: bool (optional) + If true, repair all tasks that depend on the tasks in `rerun_tasks`, even if they were previously + successful. Can be also used in combination with `rerun_all_failed_tasks`. + :param rerun_tasks: List[str] (optional) + The task keys of the task runs to repair. + :param spark_submit_params: List[str] (optional) + A list of parameters for jobs with spark submit task, for example `"spark_submit_params": + ["--class", "org.apache.spark.examples.SparkPi"]`. The parameters are passed to spark-submit script + as command-line parameters. If specified upon `run-now`, it would overwrite the parameters specified + in job setting. The JSON representation of this field (for example `{"python_params":["john + doe","35"]}`) cannot exceed 10,000 bytes. + + Use [Task parameter variables] to set parameters containing information about job runs + + Important + + These parameters accept only Latin characters (ASCII character set). Using non-ASCII characters + returns an error. Examples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and + emojis. + + [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables + :param sql_params: Dict[str,str] (optional) + A map from keys to values for jobs with SQL task, for example `"sql_params": {"name": "john doe", + "age": "35"}`. The SQL alert task does not support custom parameters. + + :returns: + Long-running operation waiter for :class:`Run`. + See :method:wait_get_run_job_terminated_or_skipped for more details. + + + .. py:method:: repair_run_and_wait(run_id: int [, dbt_commands: Optional[List[str]], jar_params: Optional[List[str]], job_parameters: Optional[Dict[str, str]], latest_repair_id: Optional[int], notebook_params: Optional[Dict[str, str]], pipeline_params: Optional[PipelineParams], python_named_params: Optional[Dict[str, str]], python_params: Optional[List[str]], rerun_all_failed_tasks: Optional[bool], rerun_dependent_tasks: Optional[bool], rerun_tasks: Optional[List[str]], spark_submit_params: Optional[List[str]], sql_params: Optional[Dict[str, str]], timeout: datetime.timedelta = 0:20:00]) -> Run + + + .. py:method:: reset(job_id: int, new_settings: JobSettings) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + new_name = f'sdk-{time.time_ns()}' + + by_id = w.jobs.get(job_id=created_job.job_id) + + w.jobs.reset(job_id=by_id.job_id, new_settings=jobs.JobSettings(name=new_name, tasks=by_id.settings.tasks)) + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Update all job settings (reset). + + Overwrite all settings for the given job. Use the [_Update_ endpoint](:method:jobs/update) to update + job settings partially. + + :param job_id: int + The canonical identifier of the job to reset. This field is required. + :param new_settings: :class:`JobSettings` + The new settings of the job. These settings completely replace the old settings. + + Changes to the field `JobBaseSettings.timeout_seconds` are applied to active runs. Changes to other + fields are applied to future runs only. + + + + + .. py:method:: run_now(job_id: int [, dbt_commands: Optional[List[str]], idempotency_token: Optional[str], jar_params: Optional[List[str]], job_parameters: Optional[Dict[str, str]], notebook_params: Optional[Dict[str, str]], pipeline_params: Optional[PipelineParams], python_named_params: Optional[Dict[str, str]], python_params: Optional[List[str]], queue: Optional[QueueSettings], spark_submit_params: Optional[List[str]], sql_params: Optional[Dict[str, str]]]) -> Wait[Run] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + run_by_id = w.jobs.run_now(job_id=created_job.job_id).result() + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Trigger a new job run. + + Run a job and return the `run_id` of the triggered run. + + :param job_id: int + The ID of the job to be executed + :param dbt_commands: List[str] (optional) + An array of commands to execute for jobs with the dbt task, for example `"dbt_commands": ["dbt + deps", "dbt seed", "dbt run"]` + :param idempotency_token: str (optional) + An optional token to guarantee the idempotency of job run requests. If a run with the provided token + already exists, the request does not create a new run but returns the ID of the existing run + instead. If a run with the provided token is deleted, an error is returned. + + If you specify the idempotency token, upon failure you can retry until the request succeeds. + Databricks guarantees that exactly one run is launched with that idempotency token. + + This token must have at most 64 characters. + + For more information, see [How to ensure idempotency for jobs]. + + [How to ensure idempotency for jobs]: https://kb.databricks.com/jobs/jobs-idempotency.html + :param jar_params: List[str] (optional) + A list of parameters for jobs with Spark JAR tasks, for example `"jar_params": ["john doe", "35"]`. + The parameters are used to invoke the main function of the main class specified in the Spark JAR + task. If not specified upon `run-now`, it defaults to an empty list. jar_params cannot be specified + in conjunction with notebook_params. The JSON representation of this field (for example + `{"jar_params":["john doe","35"]}`) cannot exceed 10,000 bytes. + + Use [Task parameter variables](/jobs.html"#parameter-variables") to set parameters containing + information about job runs. + :param job_parameters: Dict[str,str] (optional) + Job-level parameters used in the run. for example `"param": "overriding_val"` + :param notebook_params: Dict[str,str] (optional) + A map from keys to values for jobs with notebook task, for example `"notebook_params": {"name": + "john doe", "age": "35"}`. The map is passed to the notebook and is accessible through the + [dbutils.widgets.get] function. + + If not specified upon `run-now`, the triggered run uses the job’s base parameters. + + notebook_params cannot be specified in conjunction with jar_params. + + Use [Task parameter variables] to set parameters containing information about job runs. + + The JSON representation of this field (for example `{"notebook_params":{"name":"john + doe","age":"35"}}`) cannot exceed 10,000 bytes. + + [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables + [dbutils.widgets.get]: https://docs.databricks.com/dev-tools/databricks-utils.html + :param pipeline_params: :class:`PipelineParams` (optional) + :param python_named_params: Dict[str,str] (optional) + A map from keys to values for jobs with Python wheel task, for example `"python_named_params": + {"name": "task", "data": "dbfs:/path/to/data.json"}`. + :param python_params: List[str] (optional) + A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. + The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it + would overwrite the parameters specified in job setting. The JSON representation of this field (for + example `{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes. + + Use [Task parameter variables] to set parameters containing information about job runs. + + Important + + These parameters accept only Latin characters (ASCII character set). Using non-ASCII characters + returns an error. Examples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and + emojis. + + [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables + :param queue: :class:`QueueSettings` (optional) + The queue settings of the run. + :param spark_submit_params: List[str] (optional) + A list of parameters for jobs with spark submit task, for example `"spark_submit_params": + ["--class", "org.apache.spark.examples.SparkPi"]`. The parameters are passed to spark-submit script + as command-line parameters. If specified upon `run-now`, it would overwrite the parameters specified + in job setting. The JSON representation of this field (for example `{"python_params":["john + doe","35"]}`) cannot exceed 10,000 bytes. + + Use [Task parameter variables] to set parameters containing information about job runs + + Important + + These parameters accept only Latin characters (ASCII character set). Using non-ASCII characters + returns an error. Examples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and + emojis. + + [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables + :param sql_params: Dict[str,str] (optional) + A map from keys to values for jobs with SQL task, for example `"sql_params": {"name": "john doe", + "age": "35"}`. The SQL alert task does not support custom parameters. + + :returns: + Long-running operation waiter for :class:`Run`. + See :method:wait_get_run_job_terminated_or_skipped for more details. + + + .. py:method:: run_now_and_wait(job_id: int [, dbt_commands: Optional[List[str]], idempotency_token: Optional[str], jar_params: Optional[List[str]], job_parameters: Optional[Dict[str, str]], notebook_params: Optional[Dict[str, str]], pipeline_params: Optional[PipelineParams], python_named_params: Optional[Dict[str, str]], python_params: Optional[List[str]], queue: Optional[QueueSettings], spark_submit_params: Optional[List[str]], sql_params: Optional[Dict[str, str]], timeout: datetime.timedelta = 0:20:00]) -> Run + + + .. py:method:: set_permissions(job_id: str [, access_control_list: Optional[List[JobAccessControlRequest]]]) -> JobPermissions + + Set job permissions. + + Sets permissions on a job. Jobs can inherit permissions from their root object. + + :param job_id: str + The job for which to get or manage permissions. + :param access_control_list: List[:class:`JobAccessControlRequest`] (optional) + + :returns: :class:`JobPermissions` + + + .. py:method:: submit( [, access_control_list: Optional[List[iam.AccessControlRequest]], email_notifications: Optional[JobEmailNotifications], git_source: Optional[GitSource], health: Optional[JobsHealthRules], idempotency_token: Optional[str], notification_settings: Optional[JobNotificationSettings], queue: Optional[QueueSettings], run_name: Optional[str], tasks: Optional[List[SubmitTask]], timeout_seconds: Optional[int], webhook_notifications: Optional[WebhookNotifications]]) -> Wait[Run] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + run = w.jobs.submit(run_name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.SubmitTask(existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key=f'sdk-{time.time_ns()}') + ]).result() + + # cleanup + w.jobs.delete_run(run_id=run.run_id) + + Create and trigger a one-time run. + + Submit a one-time run. This endpoint allows you to submit a workload directly without creating a job. + Runs submitted using this endpoint don’t display in the UI. Use the `jobs/runs/get` API to check the + run state after the job is submitted. + + :param access_control_list: List[:class:`AccessControlRequest`] (optional) + List of permissions to set on the job. + :param email_notifications: :class:`JobEmailNotifications` (optional) + An optional set of email addresses notified when the run begins or completes. + :param git_source: :class:`GitSource` (optional) + An optional specification for a remote Git repository containing the source code used by tasks. + Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. + + If `git_source` is set, these tasks retrieve the file from the remote repository by default. + However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. + + Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are + used, `git_source` must be defined on the job. + :param health: :class:`JobsHealthRules` (optional) + An optional set of health rules that can be defined for this job. + :param idempotency_token: str (optional) + An optional token that can be used to guarantee the idempotency of job run requests. If a run with + the provided token already exists, the request does not create a new run but returns the ID of the + existing run instead. If a run with the provided token is deleted, an error is returned. + + If you specify the idempotency token, upon failure you can retry until the request succeeds. + Databricks guarantees that exactly one run is launched with that idempotency token. + + This token must have at most 64 characters. + + For more information, see [How to ensure idempotency for jobs]. + + [How to ensure idempotency for jobs]: https://kb.databricks.com/jobs/jobs-idempotency.html + :param notification_settings: :class:`JobNotificationSettings` (optional) + Optional notification settings that are used when sending notifications to each of the + `email_notifications` and `webhook_notifications` for this run. + :param queue: :class:`QueueSettings` (optional) + The queue settings of the one-time run. + :param run_name: str (optional) + An optional name for the run. The default value is `Untitled`. + :param tasks: List[:class:`SubmitTask`] (optional) + :param timeout_seconds: int (optional) + An optional timeout applied to each run of this job. A value of `0` means no timeout. + :param webhook_notifications: :class:`WebhookNotifications` (optional) + A collection of system notification IDs to notify when the run begins or completes. + + :returns: + Long-running operation waiter for :class:`Run`. + See :method:wait_get_run_job_terminated_or_skipped for more details. + + + .. py:method:: submit_and_wait( [, access_control_list: Optional[List[iam.AccessControlRequest]], email_notifications: Optional[JobEmailNotifications], git_source: Optional[GitSource], health: Optional[JobsHealthRules], idempotency_token: Optional[str], notification_settings: Optional[JobNotificationSettings], queue: Optional[QueueSettings], run_name: Optional[str], tasks: Optional[List[SubmitTask]], timeout_seconds: Optional[int], webhook_notifications: Optional[WebhookNotifications], timeout: datetime.timedelta = 0:20:00]) -> Run + + + .. py:method:: update(job_id: int [, fields_to_remove: Optional[List[str]], new_settings: Optional[JobSettings]]) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import jobs + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + cluster_id = w.clusters.ensure_cluster_is_running( + os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] + + new_name = f'sdk-{time.time_ns()}' + + created_job = w.jobs.create(name=f'sdk-{time.time_ns()}', + tasks=[ + jobs.Task(description="test", + existing_cluster_id=cluster_id, + notebook_task=jobs.NotebookTask(notebook_path=notebook_path), + task_key="test", + timeout_seconds=0) + ]) + + w.jobs.update(job_id=created_job.job_id, new_settings=jobs.JobSettings(name=new_name, max_concurrent_runs=5)) + + # cleanup + w.jobs.delete(job_id=created_job.job_id) + + Update job settings partially. + + Add, update, or remove specific settings of an existing job. Use the [_Reset_ + endpoint](:method:jobs/reset) to overwrite all job settings. + + :param job_id: int + The canonical identifier of the job to update. This field is required. + :param fields_to_remove: List[str] (optional) + Remove top-level fields in the job settings. Removing nested fields is not supported, except for + tasks and job clusters (`tasks/task_1`). This field is optional. + :param new_settings: :class:`JobSettings` (optional) + The new settings for the job. + + Top-level fields specified in `new_settings` are completely replaced, except for arrays which are + merged. That is, new and existing entries are completely replaced based on the respective key + fields, i.e. `task_key` or `job_cluster_key`, while previous entries are kept. + + Partially updating nested fields is not supported. + + Changes to the field `JobSettings.timeout_seconds` are applied to active runs. Changes to other + fields are applied to future runs only. + + + + + .. py:method:: update_permissions(job_id: str [, access_control_list: Optional[List[JobAccessControlRequest]]]) -> JobPermissions + + Update job permissions. + + Updates the permissions on a job. Jobs can inherit permissions from their root object. + + :param job_id: str + The job for which to get or manage permissions. + :param access_control_list: List[:class:`JobAccessControlRequest`] (optional) + + :returns: :class:`JobPermissions` + + + .. py:method:: wait_get_run_job_terminated_or_skipped(run_id: int, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[Run], None]]) -> Run diff --git a/docs/workspace/ml/experiments.rst b/docs/workspace/ml/experiments.rst new file mode 100644 index 000000000..62e16fc24 --- /dev/null +++ b/docs/workspace/ml/experiments.rst @@ -0,0 +1,688 @@ +``w.experiments``: Experiments +============================== +.. currentmodule:: databricks.sdk.service.ml + +.. py:class:: ExperimentsAPI + + Experiments are the primary unit of organization in MLflow; all MLflow runs belong to an experiment. Each + experiment lets you visualize, search, and compare runs, as well as download run artifacts or metadata for + analysis in other tools. Experiments are maintained in a Databricks hosted MLflow tracking server. + + Experiments are located in the workspace file tree. You manage experiments using the same tools you use to + manage other workspace objects such as folders, notebooks, and libraries. + + .. py:method:: create_experiment(name: str [, artifact_location: Optional[str], tags: Optional[List[ExperimentTag]]]) -> CreateExperimentResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + experiment = w.experiments.create_experiment(name=f'sdk-{time.time_ns()}') + + # cleanup + w.experiments.delete_experiment(experiment_id=experiment.experiment_id) + + Create experiment. + + Creates an experiment with a name. Returns the ID of the newly created experiment. Validates that + another experiment with the same name does not already exist and fails if another experiment with the + same name already exists. + + Throws `RESOURCE_ALREADY_EXISTS` if a experiment with the given name exists. + + :param name: str + Experiment name. + :param artifact_location: str (optional) + Location where all artifacts for the experiment are stored. If not provided, the remote server will + select an appropriate default. + :param tags: List[:class:`ExperimentTag`] (optional) + A collection of tags to set on the experiment. Maximum tag size and number of tags per request + depends on the storage backend. All storage backends are guaranteed to support tag keys up to 250 + bytes in size and tag values up to 5000 bytes in size. All storage backends are also guaranteed to + support up to 20 tags per request. + + :returns: :class:`CreateExperimentResponse` + + + .. py:method:: create_run( [, experiment_id: Optional[str], start_time: Optional[int], tags: Optional[List[RunTag]], user_id: Optional[str]]) -> CreateRunResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import ml + + w = WorkspaceClient() + + experiment = w.experiments.create_experiment(name=f'sdk-{time.time_ns()}') + + created = w.experiments.create_run(experiment_id=experiment.experiment_id, + tags=[ml.RunTag(key="foo", value="bar")]) + + # cleanup + w.experiments.delete_experiment(experiment_id=experiment.experiment_id) + w.experiments.delete_run(run_id=created.run.info.run_id) + + Create a run. + + Creates a new run within an experiment. A run is usually a single execution of a machine learning or + data ETL pipeline. MLflow uses runs to track the `mlflowParam`, `mlflowMetric` and `mlflowRunTag` + associated with a single execution. + + :param experiment_id: str (optional) + ID of the associated experiment. + :param start_time: int (optional) + Unix timestamp in milliseconds of when the run started. + :param tags: List[:class:`RunTag`] (optional) + Additional metadata for run. + :param user_id: str (optional) + ID of the user executing the run. This field is deprecated as of MLflow 1.0, and will be removed in + a future MLflow release. Use 'mlflow.user' tag instead. + + :returns: :class:`CreateRunResponse` + + + .. py:method:: delete_experiment(experiment_id: str) + + Delete an experiment. + + Marks an experiment and associated metadata, runs, metrics, params, and tags for deletion. If the + experiment uses FileStore, artifacts associated with experiment are also deleted. + + :param experiment_id: str + ID of the associated experiment. + + + + + .. py:method:: delete_run(run_id: str) + + Delete a run. + + Marks a run for deletion. + + :param run_id: str + ID of the run to delete. + + + + + .. py:method:: delete_runs(experiment_id: str, max_timestamp_millis: int [, max_runs: Optional[int]]) -> DeleteRunsResponse + + Delete runs by creation time. + + Bulk delete runs in an experiment that were created prior to or at the specified timestamp. Deletes at + most max_runs per request. + + :param experiment_id: str + The ID of the experiment containing the runs to delete. + :param max_timestamp_millis: int + The maximum creation timestamp in milliseconds since the UNIX epoch for deleting runs. Only runs + created prior to or at this timestamp are deleted. + :param max_runs: int (optional) + An optional positive integer indicating the maximum number of runs to delete. The maximum allowed + value for max_runs is 10000. + + :returns: :class:`DeleteRunsResponse` + + + .. py:method:: delete_tag(run_id: str, key: str) + + Delete a tag. + + Deletes a tag on a run. Tags are run metadata that can be updated during a run and after a run + completes. + + :param run_id: str + ID of the run that the tag was logged under. Must be provided. + :param key: str + Name of the tag. Maximum size is 255 bytes. Must be provided. + + + + + .. py:method:: get_by_name(experiment_name: str) -> GetExperimentResponse + + Get metadata. + + Gets metadata for an experiment. + + This endpoint will return deleted experiments, but prefers the active experiment if an active and + deleted experiment share the same name. If multiple deleted experiments share the same name, the API + will return one of them. + + Throws `RESOURCE_DOES_NOT_EXIST` if no experiment with the specified name exists. + + :param experiment_name: str + Name of the associated experiment. + + :returns: :class:`GetExperimentResponse` + + + .. py:method:: get_experiment(experiment_id: str) -> GetExperimentResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + experiment = w.experiments.create_experiment(name=f'sdk-{time.time_ns()}') + + _ = w.experiments.get_experiment(experiment_id=experiment.experiment_id) + + # cleanup + w.experiments.delete_experiment(experiment_id=experiment.experiment_id) + + Get an experiment. + + Gets metadata for an experiment. This method works on deleted experiments. + + :param experiment_id: str + ID of the associated experiment. + + :returns: :class:`GetExperimentResponse` + + + .. py:method:: get_history(metric_key: str [, max_results: Optional[int], page_token: Optional[str], run_id: Optional[str], run_uuid: Optional[str]]) -> Iterator[Metric] + + Get history of a given metric within a run. + + Gets a list of all values for the specified metric for a given run. + + :param metric_key: str + Name of the metric. + :param max_results: int (optional) + Maximum number of Metric records to return per paginated request. Default is set to 25,000. If set + higher than 25,000, a request Exception will be raised. + :param page_token: str (optional) + Token indicating the page of metric histories to fetch. + :param run_id: str (optional) + ID of the run from which to fetch metric values. Must be provided. + :param run_uuid: str (optional) + [Deprecated, use run_id instead] ID of the run from which to fetch metric values. This field will be + removed in a future MLflow version. + + :returns: Iterator over :class:`Metric` + + + .. py:method:: get_permission_levels(experiment_id: str) -> GetExperimentPermissionLevelsResponse + + Get experiment permission levels. + + Gets the permission levels that a user can have on an object. + + :param experiment_id: str + The experiment for which to get or manage permissions. + + :returns: :class:`GetExperimentPermissionLevelsResponse` + + + .. py:method:: get_permissions(experiment_id: str) -> ExperimentPermissions + + Get experiment permissions. + + Gets the permissions of an experiment. Experiments can inherit permissions from their root object. + + :param experiment_id: str + The experiment for which to get or manage permissions. + + :returns: :class:`ExperimentPermissions` + + + .. py:method:: get_run(run_id: str [, run_uuid: Optional[str]]) -> GetRunResponse + + Get a run. + + Gets the metadata, metrics, params, and tags for a run. In the case where multiple metrics with the + same key are logged for a run, return only the value with the latest timestamp. + + If there are multiple values with the latest timestamp, return the maximum of these values. + + :param run_id: str + ID of the run to fetch. Must be provided. + :param run_uuid: str (optional) + [Deprecated, use run_id instead] ID of the run to fetch. This field will be removed in a future + MLflow version. + + :returns: :class:`GetRunResponse` + + + .. py:method:: list_artifacts( [, page_token: Optional[str], path: Optional[str], run_id: Optional[str], run_uuid: Optional[str]]) -> Iterator[FileInfo] + + Get all artifacts. + + List artifacts for a run. Takes an optional `artifact_path` prefix. If it is specified, the response + contains only artifacts with the specified prefix.", + + :param page_token: str (optional) + Token indicating the page of artifact results to fetch + :param path: str (optional) + Filter artifacts matching this path (a relative path from the root artifact directory). + :param run_id: str (optional) + ID of the run whose artifacts to list. Must be provided. + :param run_uuid: str (optional) + [Deprecated, use run_id instead] ID of the run whose artifacts to list. This field will be removed + in a future MLflow version. + + :returns: Iterator over :class:`FileInfo` + + + .. py:method:: list_experiments( [, max_results: Optional[int], page_token: Optional[str], view_type: Optional[str]]) -> Iterator[Experiment] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import ml + + w = WorkspaceClient() + + all = w.experiments.list_experiments(ml.ListExperimentsRequest()) + + List experiments. + + Gets a list of all experiments. + + :param max_results: int (optional) + Maximum number of experiments desired. If `max_results` is unspecified, return all experiments. If + `max_results` is too large, it'll be automatically capped at 1000. Callers of this endpoint are + encouraged to pass max_results explicitly and leverage page_token to iterate through experiments. + :param page_token: str (optional) + Token indicating the page of experiments to fetch + :param view_type: str (optional) + Qualifier for type of experiments to be returned. If unspecified, return only active experiments. + + :returns: Iterator over :class:`Experiment` + + + .. py:method:: log_batch( [, metrics: Optional[List[Metric]], params: Optional[List[Param]], run_id: Optional[str], tags: Optional[List[RunTag]]]) + + Log a batch. + + Logs a batch of metrics, params, and tags for a run. If any data failed to be persisted, the server + will respond with an error (non-200 status code). + + In case of error (due to internal server error or an invalid request), partial data may be written. + + You can write metrics, params, and tags in interleaving fashion, but within a given entity type are + guaranteed to follow the order specified in the request body. + + The overwrite behavior for metrics, params, and tags is as follows: + + * Metrics: metric values are never overwritten. Logging a metric (key, value, timestamp) appends to + the set of values for the metric with the provided key. + + * Tags: tag values can be overwritten by successive writes to the same tag key. That is, if multiple + tag values with the same key are provided in the same API request, the last-provided tag value is + written. Logging the same tag (key, value) is permitted. Specifically, logging a tag is idempotent. + + * Parameters: once written, param values cannot be changed (attempting to overwrite a param value will + result in an error). However, logging the same param (key, value) is permitted. Specifically, logging + a param is idempotent. + + Request Limits ------------------------------- A single JSON-serialized API request may be up to 1 MB + in size and contain: + + * No more than 1000 metrics, params, and tags in total * Up to 1000 metrics * Up to 100 params * Up to + 100 tags + + For example, a valid request might contain 900 metrics, 50 params, and 50 tags, but logging 900 + metrics, 50 params, and 51 tags is invalid. + + The following limits also apply to metric, param, and tag keys and values: + + * Metric keys, param keys, and tag keys can be up to 250 characters in length * Parameter and tag + values can be up to 250 characters in length + + :param metrics: List[:class:`Metric`] (optional) + Metrics to log. A single request can contain up to 1000 metrics, and up to 1000 metrics, params, and + tags in total. + :param params: List[:class:`Param`] (optional) + Params to log. A single request can contain up to 100 params, and up to 1000 metrics, params, and + tags in total. + :param run_id: str (optional) + ID of the run to log under + :param tags: List[:class:`RunTag`] (optional) + Tags to log. A single request can contain up to 100 tags, and up to 1000 metrics, params, and tags + in total. + + + + + .. py:method:: log_inputs( [, datasets: Optional[List[DatasetInput]], run_id: Optional[str]]) + + Log inputs to a run. + + **NOTE:** Experimental: This API may change or be removed in a future release without warning. + + :param datasets: List[:class:`DatasetInput`] (optional) + Dataset inputs + :param run_id: str (optional) + ID of the run to log under + + + + + .. py:method:: log_metric(key: str, value: float, timestamp: int [, run_id: Optional[str], run_uuid: Optional[str], step: Optional[int]]) + + Log a metric. + + Logs a metric for a run. A metric is a key-value pair (string key, float value) with an associated + timestamp. Examples include the various metrics that represent ML model accuracy. A metric can be + logged multiple times. + + :param key: str + Name of the metric. + :param value: float + Double value of the metric being logged. + :param timestamp: int + Unix timestamp in milliseconds at the time metric was logged. + :param run_id: str (optional) + ID of the run under which to log the metric. Must be provided. + :param run_uuid: str (optional) + [Deprecated, use run_id instead] ID of the run under which to log the metric. This field will be + removed in a future MLflow version. + :param step: int (optional) + Step at which to log the metric + + + + + .. py:method:: log_model( [, model_json: Optional[str], run_id: Optional[str]]) + + Log a model. + + **NOTE:** Experimental: This API may change or be removed in a future release without warning. + + :param model_json: str (optional) + MLmodel file in json format. + :param run_id: str (optional) + ID of the run to log under + + + + + .. py:method:: log_param(key: str, value: str [, run_id: Optional[str], run_uuid: Optional[str]]) + + Log a param. + + Logs a param used for a run. A param is a key-value pair (string key, string value). Examples include + hyperparameters used for ML model training and constant dates and values used in an ETL pipeline. A + param can be logged only once for a run. + + :param key: str + Name of the param. Maximum size is 255 bytes. + :param value: str + String value of the param being logged. Maximum size is 500 bytes. + :param run_id: str (optional) + ID of the run under which to log the param. Must be provided. + :param run_uuid: str (optional) + [Deprecated, use run_id instead] ID of the run under which to log the param. This field will be + removed in a future MLflow version. + + + + + .. py:method:: restore_experiment(experiment_id: str) + + Restores an experiment. + + Restore an experiment marked for deletion. This also restores associated metadata, runs, metrics, + params, and tags. If experiment uses FileStore, underlying artifacts associated with experiment are + also restored. + + Throws `RESOURCE_DOES_NOT_EXIST` if experiment was never created or was permanently deleted. + + :param experiment_id: str + ID of the associated experiment. + + + + + .. py:method:: restore_run(run_id: str) + + Restore a run. + + Restores a deleted run. + + :param run_id: str + ID of the run to restore. + + + + + .. py:method:: restore_runs(experiment_id: str, min_timestamp_millis: int [, max_runs: Optional[int]]) -> RestoreRunsResponse + + Restore runs by deletion time. + + Bulk restore runs in an experiment that were deleted no earlier than the specified timestamp. Restores + at most max_runs per request. + + :param experiment_id: str + The ID of the experiment containing the runs to restore. + :param min_timestamp_millis: int + The minimum deletion timestamp in milliseconds since the UNIX epoch for restoring runs. Only runs + deleted no earlier than this timestamp are restored. + :param max_runs: int (optional) + An optional positive integer indicating the maximum number of runs to restore. The maximum allowed + value for max_runs is 10000. + + :returns: :class:`RestoreRunsResponse` + + + .. py:method:: search_experiments( [, filter: Optional[str], max_results: Optional[int], order_by: Optional[List[str]], page_token: Optional[str], view_type: Optional[SearchExperimentsViewType]]) -> Iterator[Experiment] + + Search experiments. + + Searches for experiments that satisfy specified search criteria. + + :param filter: str (optional) + String representing a SQL filter condition (e.g. "name ILIKE 'my-experiment%'") + :param max_results: int (optional) + Maximum number of experiments desired. Max threshold is 3000. + :param order_by: List[str] (optional) + List of columns for ordering search results, which can include experiment name and last updated + timestamp with an optional "DESC" or "ASC" annotation, where "ASC" is the default. Tiebreaks are + done by experiment id DESC. + :param page_token: str (optional) + Token indicating the page of experiments to fetch + :param view_type: :class:`SearchExperimentsViewType` (optional) + Qualifier for type of experiments to be returned. If unspecified, return only active experiments. + + :returns: Iterator over :class:`Experiment` + + + .. py:method:: search_runs( [, experiment_ids: Optional[List[str]], filter: Optional[str], max_results: Optional[int], order_by: Optional[List[str]], page_token: Optional[str], run_view_type: Optional[SearchRunsRunViewType]]) -> Iterator[Run] + + Search for runs. + + Searches for runs that satisfy expressions. + + Search expressions can use `mlflowMetric` and `mlflowParam` keys.", + + :param experiment_ids: List[str] (optional) + List of experiment IDs to search over. + :param filter: str (optional) + A filter expression over params, metrics, and tags, that allows returning a subset of runs. The + syntax is a subset of SQL that supports ANDing together binary operations between a param, metric, + or tag and a constant. + + Example: `metrics.rmse < 1 and params.model_class = 'LogisticRegression'` + + You can select columns with special characters (hyphen, space, period, etc.) by using double quotes: + `metrics."model class" = 'LinearRegression' and tags."user-name" = 'Tomas'` + + Supported operators are `=`, `!=`, `>`, `>=`, `<`, and `<=`. + :param max_results: int (optional) + Maximum number of runs desired. Max threshold is 50000 + :param order_by: List[str] (optional) + List of columns to be ordered by, including attributes, params, metrics, and tags with an optional + "DESC" or "ASC" annotation, where "ASC" is the default. Example: ["params.input DESC", + "metrics.alpha ASC", "metrics.rmse"] Tiebreaks are done by start_time DESC followed by run_id for + runs with the same start time (and this is the default ordering criterion if order_by is not + provided). + :param page_token: str (optional) + Token for the current page of runs. + :param run_view_type: :class:`SearchRunsRunViewType` (optional) + Whether to display only active, only deleted, or all runs. Defaults to only active runs. + + :returns: Iterator over :class:`Run` + + + .. py:method:: set_experiment_tag(experiment_id: str, key: str, value: str) + + Set a tag. + + Sets a tag on an experiment. Experiment tags are metadata that can be updated. + + :param experiment_id: str + ID of the experiment under which to log the tag. Must be provided. + :param key: str + Name of the tag. Maximum size depends on storage backend. All storage backends are guaranteed to + support key values up to 250 bytes in size. + :param value: str + String value of the tag being logged. Maximum size depends on storage backend. All storage backends + are guaranteed to support key values up to 5000 bytes in size. + + + + + .. py:method:: set_permissions(experiment_id: str [, access_control_list: Optional[List[ExperimentAccessControlRequest]]]) -> ExperimentPermissions + + Set experiment permissions. + + Sets permissions on an experiment. Experiments can inherit permissions from their root object. + + :param experiment_id: str + The experiment for which to get or manage permissions. + :param access_control_list: List[:class:`ExperimentAccessControlRequest`] (optional) + + :returns: :class:`ExperimentPermissions` + + + .. py:method:: set_tag(key: str, value: str [, run_id: Optional[str], run_uuid: Optional[str]]) + + Set a tag. + + Sets a tag on a run. Tags are run metadata that can be updated during a run and after a run completes. + + :param key: str + Name of the tag. Maximum size depends on storage backend. All storage backends are guaranteed to + support key values up to 250 bytes in size. + :param value: str + String value of the tag being logged. Maximum size depends on storage backend. All storage backends + are guaranteed to support key values up to 5000 bytes in size. + :param run_id: str (optional) + ID of the run under which to log the tag. Must be provided. + :param run_uuid: str (optional) + [Deprecated, use run_id instead] ID of the run under which to log the tag. This field will be + removed in a future MLflow version. + + + + + .. py:method:: update_experiment(experiment_id: str [, new_name: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + experiment = w.experiments.create_experiment(name=f'sdk-{time.time_ns()}') + + w.experiments.update_experiment(new_name=f'sdk-{time.time_ns()}', experiment_id=experiment.experiment_id) + + # cleanup + w.experiments.delete_experiment(experiment_id=experiment.experiment_id) + + Update an experiment. + + Updates experiment metadata. + + :param experiment_id: str + ID of the associated experiment. + :param new_name: str (optional) + If provided, the experiment's name is changed to the new name. The new name must be unique. + + + + + .. py:method:: update_permissions(experiment_id: str [, access_control_list: Optional[List[ExperimentAccessControlRequest]]]) -> ExperimentPermissions + + Update experiment permissions. + + Updates the permissions on an experiment. Experiments can inherit permissions from their root object. + + :param experiment_id: str + The experiment for which to get or manage permissions. + :param access_control_list: List[:class:`ExperimentAccessControlRequest`] (optional) + + :returns: :class:`ExperimentPermissions` + + + .. py:method:: update_run( [, end_time: Optional[int], run_id: Optional[str], run_uuid: Optional[str], status: Optional[UpdateRunStatus]]) -> UpdateRunResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import ml + + w = WorkspaceClient() + + experiment = w.experiments.create_experiment(name=f'sdk-{time.time_ns()}') + + created = w.experiments.create_run(experiment_id=experiment.experiment_id, + tags=[ml.RunTag(key="foo", value="bar")]) + + _ = w.experiments.update_run(run_id=created.run.info.run_id, status=ml.UpdateRunStatus.KILLED) + + # cleanup + w.experiments.delete_experiment(experiment_id=experiment.experiment_id) + w.experiments.delete_run(run_id=created.run.info.run_id) + + Update a run. + + Updates run metadata. + + :param end_time: int (optional) + Unix timestamp in milliseconds of when the run ended. + :param run_id: str (optional) + ID of the run to update. Must be provided. + :param run_uuid: str (optional) + [Deprecated, use run_id instead] ID of the run to update.. This field will be removed in a future + MLflow version. + :param status: :class:`UpdateRunStatus` (optional) + Updated status of the run. + + :returns: :class:`UpdateRunResponse` + \ No newline at end of file diff --git a/docs/workspace/ml/index.rst b/docs/workspace/ml/index.rst new file mode 100644 index 000000000..1a713eb57 --- /dev/null +++ b/docs/workspace/ml/index.rst @@ -0,0 +1,11 @@ + +Machine Learning +================ + +Create and manage experiments, features, and other machine learning artifacts + +.. toctree:: + :maxdepth: 1 + + experiments + model_registry \ No newline at end of file diff --git a/docs/workspace/ml/model_registry.rst b/docs/workspace/ml/model_registry.rst new file mode 100644 index 000000000..8ac52916f --- /dev/null +++ b/docs/workspace/ml/model_registry.rst @@ -0,0 +1,913 @@ +``w.model_registry``: Model Registry +==================================== +.. currentmodule:: databricks.sdk.service.ml + +.. py:class:: ModelRegistryAPI + + Note: This API reference documents APIs for the Workspace Model Registry. Databricks recommends using + [Models in Unity Catalog](/api/workspace/registeredmodels) instead. Models in Unity Catalog provides + centralized model governance, cross-workspace access, lineage, and deployment. Workspace Model Registry + will be deprecated in the future. + + The Workspace Model Registry is a centralized model repository and a UI and set of APIs that enable you to + manage the full lifecycle of MLflow Models. + + .. py:method:: approve_transition_request(name: str, version: str, stage: Stage, archive_existing_versions: bool [, comment: Optional[str]]) -> ApproveTransitionRequestResponse + + Approve transition request. + + Approves a model version stage transition request. + + :param name: str + Name of the model. + :param version: str + Version of the model. + :param stage: :class:`Stage` + Target stage of the transition. Valid values are: + + * `None`: The initial stage of a model version. + + * `Staging`: Staging or pre-production stage. + + * `Production`: Production stage. + + * `Archived`: Archived stage. + :param archive_existing_versions: bool + Specifies whether to archive all current model versions in the target stage. + :param comment: str (optional) + User-provided comment on the action. + + :returns: :class:`ApproveTransitionRequestResponse` + + + .. py:method:: create_comment(name: str, version: str, comment: str) -> CreateCommentResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + model = w.model_registry.create_model(name=f'sdk-{time.time_ns()}') + + mv = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") + + created = w.model_registry.create_comment(comment=f'sdk-{time.time_ns()}', + name=mv.model_version.name, + version=mv.model_version.version) + + # cleanup + w.model_registry.delete_comment(id=created.comment.id) + + Post a comment. + + Posts a comment on a model version. A comment can be submitted either by a user or programmatically to + display relevant information about the model. For example, test results or deployment errors. + + :param name: str + Name of the model. + :param version: str + Version of the model. + :param comment: str + User-provided comment on the action. + + :returns: :class:`CreateCommentResponse` + + + .. py:method:: create_model(name: str [, description: Optional[str], tags: Optional[List[ModelTag]]]) -> CreateModelResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + model = w.model_registry.create_model(name=f'sdk-{time.time_ns()}') + + Create a model. + + Creates a new registered model with the name specified in the request body. + + Throws `RESOURCE_ALREADY_EXISTS` if a registered model with the given name exists. + + :param name: str + Register models under this name + :param description: str (optional) + Optional description for registered model. + :param tags: List[:class:`ModelTag`] (optional) + Additional metadata for registered model. + + :returns: :class:`CreateModelResponse` + + + .. py:method:: create_model_version(name: str, source: str [, description: Optional[str], run_id: Optional[str], run_link: Optional[str], tags: Optional[List[ModelVersionTag]]]) -> CreateModelVersionResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + model = w.model_registry.create_model(name=f'sdk-{time.time_ns()}') + + mv = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") + + Create a model version. + + Creates a model version. + + :param name: str + Register model under this name + :param source: str + URI indicating the location of the model artifacts. + :param description: str (optional) + Optional description for model version. + :param run_id: str (optional) + MLflow run ID for correlation, if `source` was generated by an experiment run in MLflow tracking + server + :param run_link: str (optional) + MLflow run link - this is the exact link of the run that generated this model version, potentially + hosted at another instance of MLflow. + :param tags: List[:class:`ModelVersionTag`] (optional) + Additional metadata for model version. + + :returns: :class:`CreateModelVersionResponse` + + + .. py:method:: create_transition_request(name: str, version: str, stage: Stage [, comment: Optional[str]]) -> CreateTransitionRequestResponse + + Make a transition request. + + Creates a model version stage transition request. + + :param name: str + Name of the model. + :param version: str + Version of the model. + :param stage: :class:`Stage` + Target stage of the transition. Valid values are: + + * `None`: The initial stage of a model version. + + * `Staging`: Staging or pre-production stage. + + * `Production`: Production stage. + + * `Archived`: Archived stage. + :param comment: str (optional) + User-provided comment on the action. + + :returns: :class:`CreateTransitionRequestResponse` + + + .. py:method:: create_webhook(events: List[RegistryWebhookEvent] [, description: Optional[str], http_url_spec: Optional[HttpUrlSpec], job_spec: Optional[JobSpec], model_name: Optional[str], status: Optional[RegistryWebhookStatus]]) -> CreateWebhookResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import ml + + w = WorkspaceClient() + + created = w.model_registry.create_webhook(description=f'sdk-{time.time_ns()}', + events=[ml.RegistryWebhookEvent.MODEL_VERSION_CREATED], + http_url_spec=ml.HttpUrlSpec(url=w.config.host)) + + # cleanup + w.model_registry.delete_webhook(id=created.webhook.id) + + Create a webhook. + + **NOTE**: This endpoint is in Public Preview. + + Creates a registry webhook. + + :param events: List[:class:`RegistryWebhookEvent`] + Events that can trigger a registry webhook: * `MODEL_VERSION_CREATED`: A new model version was + created for the associated model. + + * `MODEL_VERSION_TRANSITIONED_STAGE`: A model version’s stage was changed. + + * `TRANSITION_REQUEST_CREATED`: A user requested a model version’s stage be transitioned. + + * `COMMENT_CREATED`: A user wrote a comment on a registered model. + + * `REGISTERED_MODEL_CREATED`: A new registered model was created. This event type can only be + specified for a registry-wide webhook, which can be created by not specifying a model name in the + create request. + + * `MODEL_VERSION_TAG_SET`: A user set a tag on the model version. + + * `MODEL_VERSION_TRANSITIONED_TO_STAGING`: A model version was transitioned to staging. + + * `MODEL_VERSION_TRANSITIONED_TO_PRODUCTION`: A model version was transitioned to production. + + * `MODEL_VERSION_TRANSITIONED_TO_ARCHIVED`: A model version was archived. + + * `TRANSITION_REQUEST_TO_STAGING_CREATED`: A user requested a model version be transitioned to + staging. + + * `TRANSITION_REQUEST_TO_PRODUCTION_CREATED`: A user requested a model version be transitioned to + production. + + * `TRANSITION_REQUEST_TO_ARCHIVED_CREATED`: A user requested a model version be archived. + :param description: str (optional) + User-specified description for the webhook. + :param http_url_spec: :class:`HttpUrlSpec` (optional) + :param job_spec: :class:`JobSpec` (optional) + :param model_name: str (optional) + Name of the model whose events would trigger this webhook. + :param status: :class:`RegistryWebhookStatus` (optional) + Enable or disable triggering the webhook, or put the webhook into test mode. The default is + `ACTIVE`: * `ACTIVE`: Webhook is triggered when an associated event happens. + + * `DISABLED`: Webhook is not triggered. + + * `TEST_MODE`: Webhook can be triggered through the test endpoint, but is not triggered on a real + event. + + :returns: :class:`CreateWebhookResponse` + + + .. py:method:: delete_comment(id: str) + + Delete a comment. + + Deletes a comment on a model version. + + :param id: str + + + + + .. py:method:: delete_model(name: str) + + Delete a model. + + Deletes a registered model. + + :param name: str + Registered model unique name identifier. + + + + + .. py:method:: delete_model_tag(name: str, key: str) + + Delete a model tag. + + Deletes the tag for a registered model. + + :param name: str + Name of the registered model that the tag was logged under. + :param key: str + Name of the tag. The name must be an exact match; wild-card deletion is not supported. Maximum size + is 250 bytes. + + + + + .. py:method:: delete_model_version(name: str, version: str) + + Delete a model version. + + Deletes a model version. + + :param name: str + Name of the registered model + :param version: str + Model version number + + + + + .. py:method:: delete_model_version_tag(name: str, version: str, key: str) + + Delete a model version tag. + + Deletes a model version tag. + + :param name: str + Name of the registered model that the tag was logged under. + :param version: str + Model version number that the tag was logged under. + :param key: str + Name of the tag. The name must be an exact match; wild-card deletion is not supported. Maximum size + is 250 bytes. + + + + + .. py:method:: delete_transition_request(name: str, version: str, stage: DeleteTransitionRequestStage, creator: str [, comment: Optional[str]]) + + Delete a transition request. + + Cancels a model version stage transition request. + + :param name: str + Name of the model. + :param version: str + Version of the model. + :param stage: :class:`DeleteTransitionRequestStage` + Target stage of the transition request. Valid values are: + + * `None`: The initial stage of a model version. + + * `Staging`: Staging or pre-production stage. + + * `Production`: Production stage. + + * `Archived`: Archived stage. + :param creator: str + Username of the user who created this request. Of the transition requests matching the specified + details, only the one transition created by this user will be deleted. + :param comment: str (optional) + User-provided comment on the action. + + + + + .. py:method:: delete_webhook( [, id: Optional[str]]) + + Delete a webhook. + + **NOTE:** This endpoint is in Public Preview. + + Deletes a registry webhook. + + :param id: str (optional) + Webhook ID required to delete a registry webhook. + + + + + .. py:method:: get_latest_versions(name: str [, stages: Optional[List[str]]]) -> Iterator[ModelVersion] + + Get the latest version. + + Gets the latest version of a registered model. + + :param name: str + Registered model unique name identifier. + :param stages: List[str] (optional) + List of stages. + + :returns: Iterator over :class:`ModelVersion` + + + .. py:method:: get_model(name: str) -> GetModelResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.model_registry.create_model(name=f'sdk-{time.time_ns()}') + + model = w.model_registry.get_model(name=created.registered_model.name) + + Get model. + + Get the details of a model. This is a Databricks workspace version of the [MLflow endpoint] that also + returns the model's Databricks workspace ID and the permission level of the requesting user on the + model. + + [MLflow endpoint]: https://www.mlflow.org/docs/latest/rest-api.html#get-registeredmodel + + :param name: str + Registered model unique name identifier. + + :returns: :class:`GetModelResponse` + + + .. py:method:: get_model_version(name: str, version: str) -> GetModelVersionResponse + + Get a model version. + + Get a model version. + + :param name: str + Name of the registered model + :param version: str + Model version number + + :returns: :class:`GetModelVersionResponse` + + + .. py:method:: get_model_version_download_uri(name: str, version: str) -> GetModelVersionDownloadUriResponse + + Get a model version URI. + + Gets a URI to download the model version. + + :param name: str + Name of the registered model + :param version: str + Model version number + + :returns: :class:`GetModelVersionDownloadUriResponse` + + + .. py:method:: get_permission_levels(registered_model_id: str) -> GetRegisteredModelPermissionLevelsResponse + + Get registered model permission levels. + + Gets the permission levels that a user can have on an object. + + :param registered_model_id: str + The registered model for which to get or manage permissions. + + :returns: :class:`GetRegisteredModelPermissionLevelsResponse` + + + .. py:method:: get_permissions(registered_model_id: str) -> RegisteredModelPermissions + + Get registered model permissions. + + Gets the permissions of a registered model. Registered models can inherit permissions from their root + object. + + :param registered_model_id: str + The registered model for which to get or manage permissions. + + :returns: :class:`RegisteredModelPermissions` + + + .. py:method:: list_models( [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[Model] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import ml + + w = WorkspaceClient() + + all = w.model_registry.list_models(ml.ListModelsRequest()) + + List models. + + Lists all available registered models, up to the limit specified in __max_results__. + + :param max_results: int (optional) + Maximum number of registered models desired. Max threshold is 1000. + :param page_token: str (optional) + Pagination token to go to the next page based on a previous query. + + :returns: Iterator over :class:`Model` + + + .. py:method:: list_transition_requests(name: str, version: str) -> Iterator[Activity] + + List transition requests. + + Gets a list of all open stage transition requests for the model version. + + :param name: str + Name of the model. + :param version: str + Version of the model. + + :returns: Iterator over :class:`Activity` + + + .. py:method:: list_webhooks( [, events: Optional[List[RegistryWebhookEvent]], model_name: Optional[str], page_token: Optional[str]]) -> Iterator[RegistryWebhook] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import ml + + w = WorkspaceClient() + + all = w.model_registry.list_webhooks(ml.ListWebhooksRequest()) + + List registry webhooks. + + **NOTE:** This endpoint is in Public Preview. + + Lists all registry webhooks. + + :param events: List[:class:`RegistryWebhookEvent`] (optional) + If `events` is specified, any webhook with one or more of the specified trigger events is included + in the output. If `events` is not specified, webhooks of all event types are included in the output. + :param model_name: str (optional) + If not specified, all webhooks associated with the specified events are listed, regardless of their + associated model. + :param page_token: str (optional) + Token indicating the page of artifact results to fetch + + :returns: Iterator over :class:`RegistryWebhook` + + + .. py:method:: reject_transition_request(name: str, version: str, stage: Stage [, comment: Optional[str]]) -> RejectTransitionRequestResponse + + Reject a transition request. + + Rejects a model version stage transition request. + + :param name: str + Name of the model. + :param version: str + Version of the model. + :param stage: :class:`Stage` + Target stage of the transition. Valid values are: + + * `None`: The initial stage of a model version. + + * `Staging`: Staging or pre-production stage. + + * `Production`: Production stage. + + * `Archived`: Archived stage. + :param comment: str (optional) + User-provided comment on the action. + + :returns: :class:`RejectTransitionRequestResponse` + + + .. py:method:: rename_model(name: str [, new_name: Optional[str]]) -> RenameModelResponse + + Rename a model. + + Renames a registered model. + + :param name: str + Registered model unique name identifier. + :param new_name: str (optional) + If provided, updates the name for this `registered_model`. + + :returns: :class:`RenameModelResponse` + + + .. py:method:: search_model_versions( [, filter: Optional[str], max_results: Optional[int], order_by: Optional[List[str]], page_token: Optional[str]]) -> Iterator[ModelVersion] + + Searches model versions. + + Searches for specific model versions based on the supplied __filter__. + + :param filter: str (optional) + String filter condition, like "name='my-model-name'". Must be a single boolean condition, with + string values wrapped in single quotes. + :param max_results: int (optional) + Maximum number of models desired. Max threshold is 10K. + :param order_by: List[str] (optional) + List of columns to be ordered by including model name, version, stage with an optional "DESC" or + "ASC" annotation, where "ASC" is the default. Tiebreaks are done by latest stage transition + timestamp, followed by name ASC, followed by version DESC. + :param page_token: str (optional) + Pagination token to go to next page based on previous search query. + + :returns: Iterator over :class:`ModelVersion` + + + .. py:method:: search_models( [, filter: Optional[str], max_results: Optional[int], order_by: Optional[List[str]], page_token: Optional[str]]) -> Iterator[Model] + + Search models. + + Search for registered models based on the specified __filter__. + + :param filter: str (optional) + String filter condition, like "name LIKE 'my-model-name'". Interpreted in the backend automatically + as "name LIKE '%my-model-name%'". Single boolean condition, with string values wrapped in single + quotes. + :param max_results: int (optional) + Maximum number of models desired. Default is 100. Max threshold is 1000. + :param order_by: List[str] (optional) + List of columns for ordering search results, which can include model name and last updated timestamp + with an optional "DESC" or "ASC" annotation, where "ASC" is the default. Tiebreaks are done by model + name ASC. + :param page_token: str (optional) + Pagination token to go to the next page based on a previous search query. + + :returns: Iterator over :class:`Model` + + + .. py:method:: set_model_tag(name: str, key: str, value: str) + + Set a tag. + + Sets a tag on a registered model. + + :param name: str + Unique name of the model. + :param key: str + Name of the tag. Maximum size depends on storage backend. If a tag with this name already exists, + its preexisting value will be replaced by the specified `value`. All storage backends are guaranteed + to support key values up to 250 bytes in size. + :param value: str + String value of the tag being logged. Maximum size depends on storage backend. All storage backends + are guaranteed to support key values up to 5000 bytes in size. + + + + + .. py:method:: set_model_version_tag(name: str, version: str, key: str, value: str) + + Set a version tag. + + Sets a model version tag. + + :param name: str + Unique name of the model. + :param version: str + Model version number. + :param key: str + Name of the tag. Maximum size depends on storage backend. If a tag with this name already exists, + its preexisting value will be replaced by the specified `value`. All storage backends are guaranteed + to support key values up to 250 bytes in size. + :param value: str + String value of the tag being logged. Maximum size depends on storage backend. All storage backends + are guaranteed to support key values up to 5000 bytes in size. + + + + + .. py:method:: set_permissions(registered_model_id: str [, access_control_list: Optional[List[RegisteredModelAccessControlRequest]]]) -> RegisteredModelPermissions + + Set registered model permissions. + + Sets permissions on a registered model. Registered models can inherit permissions from their root + object. + + :param registered_model_id: str + The registered model for which to get or manage permissions. + :param access_control_list: List[:class:`RegisteredModelAccessControlRequest`] (optional) + + :returns: :class:`RegisteredModelPermissions` + + + .. py:method:: test_registry_webhook(id: str [, event: Optional[RegistryWebhookEvent]]) -> TestRegistryWebhookResponse + + Test a webhook. + + **NOTE:** This endpoint is in Public Preview. + + Tests a registry webhook. + + :param id: str + Webhook ID + :param event: :class:`RegistryWebhookEvent` (optional) + If `event` is specified, the test trigger uses the specified event. If `event` is not specified, the + test trigger uses a randomly chosen event associated with the webhook. + + :returns: :class:`TestRegistryWebhookResponse` + + + .. py:method:: transition_stage(name: str, version: str, stage: Stage, archive_existing_versions: bool [, comment: Optional[str]]) -> TransitionStageResponse + + Transition a stage. + + Transition a model version's stage. This is a Databricks workspace version of the [MLflow endpoint] + that also accepts a comment associated with the transition to be recorded.", + + [MLflow endpoint]: https://www.mlflow.org/docs/latest/rest-api.html#transition-modelversion-stage + + :param name: str + Name of the model. + :param version: str + Version of the model. + :param stage: :class:`Stage` + Target stage of the transition. Valid values are: + + * `None`: The initial stage of a model version. + + * `Staging`: Staging or pre-production stage. + + * `Production`: Production stage. + + * `Archived`: Archived stage. + :param archive_existing_versions: bool + Specifies whether to archive all current model versions in the target stage. + :param comment: str (optional) + User-provided comment on the action. + + :returns: :class:`TransitionStageResponse` + + + .. py:method:: update_comment(id: str, comment: str) -> UpdateCommentResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + model = w.model_registry.create_model(name=f'sdk-{time.time_ns()}') + + mv = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") + + created = w.model_registry.create_comment(comment=f'sdk-{time.time_ns()}', + name=mv.model_version.name, + version=mv.model_version.version) + + _ = w.model_registry.update_comment(comment=f'sdk-{time.time_ns()}', id=created.comment.id) + + # cleanup + w.model_registry.delete_comment(id=created.comment.id) + + Update a comment. + + Post an edit to a comment on a model version. + + :param id: str + Unique identifier of an activity + :param comment: str + User-provided comment on the action. + + :returns: :class:`UpdateCommentResponse` + + + .. py:method:: update_model(name: str [, description: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + model = w.model_registry.create_model(name=f'sdk-{time.time_ns()}') + + created = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") + + w.model_registry.update_model_version(description=f'sdk-{time.time_ns()}', + name=created.model_version.name, + version=created.model_version.version) + + Update model. + + Updates a registered model. + + :param name: str + Registered model unique name identifier. + :param description: str (optional) + If provided, updates the description for this `registered_model`. + + + + + .. py:method:: update_model_version(name: str, version: str [, description: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + model = w.model_registry.create_model(name=f'sdk-{time.time_ns()}') + + created = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") + + w.model_registry.update_model_version(description=f'sdk-{time.time_ns()}', + name=created.model_version.name, + version=created.model_version.version) + + Update model version. + + Updates the model version. + + :param name: str + Name of the registered model + :param version: str + Model version number + :param description: str (optional) + If provided, updates the description for this `registered_model`. + + + + + .. py:method:: update_permissions(registered_model_id: str [, access_control_list: Optional[List[RegisteredModelAccessControlRequest]]]) -> RegisteredModelPermissions + + Update registered model permissions. + + Updates the permissions on a registered model. Registered models can inherit permissions from their + root object. + + :param registered_model_id: str + The registered model for which to get or manage permissions. + :param access_control_list: List[:class:`RegisteredModelAccessControlRequest`] (optional) + + :returns: :class:`RegisteredModelPermissions` + + + .. py:method:: update_webhook(id: str [, description: Optional[str], events: Optional[List[RegistryWebhookEvent]], http_url_spec: Optional[HttpUrlSpec], job_spec: Optional[JobSpec], status: Optional[RegistryWebhookStatus]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import ml + + w = WorkspaceClient() + + created = w.model_registry.create_webhook(description=f'sdk-{time.time_ns()}', + events=[ml.RegistryWebhookEvent.MODEL_VERSION_CREATED], + http_url_spec=ml.HttpUrlSpec(url=w.config.host)) + + w.model_registry.update_webhook(id=created.webhook.id, description=f'sdk-{time.time_ns()}') + + # cleanup + w.model_registry.delete_webhook(id=created.webhook.id) + + Update a webhook. + + **NOTE:** This endpoint is in Public Preview. + + Updates a registry webhook. + + :param id: str + Webhook ID + :param description: str (optional) + User-specified description for the webhook. + :param events: List[:class:`RegistryWebhookEvent`] (optional) + Events that can trigger a registry webhook: * `MODEL_VERSION_CREATED`: A new model version was + created for the associated model. + + * `MODEL_VERSION_TRANSITIONED_STAGE`: A model version’s stage was changed. + + * `TRANSITION_REQUEST_CREATED`: A user requested a model version’s stage be transitioned. + + * `COMMENT_CREATED`: A user wrote a comment on a registered model. + + * `REGISTERED_MODEL_CREATED`: A new registered model was created. This event type can only be + specified for a registry-wide webhook, which can be created by not specifying a model name in the + create request. + + * `MODEL_VERSION_TAG_SET`: A user set a tag on the model version. + + * `MODEL_VERSION_TRANSITIONED_TO_STAGING`: A model version was transitioned to staging. + + * `MODEL_VERSION_TRANSITIONED_TO_PRODUCTION`: A model version was transitioned to production. + + * `MODEL_VERSION_TRANSITIONED_TO_ARCHIVED`: A model version was archived. + + * `TRANSITION_REQUEST_TO_STAGING_CREATED`: A user requested a model version be transitioned to + staging. + + * `TRANSITION_REQUEST_TO_PRODUCTION_CREATED`: A user requested a model version be transitioned to + production. + + * `TRANSITION_REQUEST_TO_ARCHIVED_CREATED`: A user requested a model version be archived. + :param http_url_spec: :class:`HttpUrlSpec` (optional) + :param job_spec: :class:`JobSpec` (optional) + :param status: :class:`RegistryWebhookStatus` (optional) + Enable or disable triggering the webhook, or put the webhook into test mode. The default is + `ACTIVE`: * `ACTIVE`: Webhook is triggered when an associated event happens. + + * `DISABLED`: Webhook is not triggered. + + * `TEST_MODE`: Webhook can be triggered through the test endpoint, but is not triggered on a real + event. + + + \ No newline at end of file diff --git a/docs/workspace/pipelines/index.rst b/docs/workspace/pipelines/index.rst new file mode 100644 index 000000000..83aaafe99 --- /dev/null +++ b/docs/workspace/pipelines/index.rst @@ -0,0 +1,10 @@ + +Delta Live Tables +================= + +Manage pipelines, runs, and other Delta Live Table resources + +.. toctree:: + :maxdepth: 1 + + pipelines \ No newline at end of file diff --git a/docs/workspace/pipelines/pipelines.rst b/docs/workspace/pipelines/pipelines.rst new file mode 100644 index 000000000..29872c900 --- /dev/null +++ b/docs/workspace/pipelines/pipelines.rst @@ -0,0 +1,497 @@ +``w.pipelines``: Pipelines +========================== +.. currentmodule:: databricks.sdk.service.pipelines + +.. py:class:: PipelinesAPI + + The Delta Live Tables API allows you to create, edit, delete, start, and view details about pipelines. + + Delta Live Tables is a framework for building reliable, maintainable, and testable data processing + pipelines. You define the transformations to perform on your data, and Delta Live Tables manages task + orchestration, cluster management, monitoring, data quality, and error handling. + + Instead of defining your data pipelines using a series of separate Apache Spark tasks, Delta Live Tables + manages how your data is transformed based on a target schema you define for each processing step. You can + also enforce data quality with Delta Live Tables expectations. Expectations allow you to define expected + data quality and specify how to handle records that fail those expectations. + + .. py:method:: create( [, allow_duplicate_names: Optional[bool], catalog: Optional[str], channel: Optional[str], clusters: Optional[List[PipelineCluster]], configuration: Optional[Dict[str, str]], continuous: Optional[bool], development: Optional[bool], dry_run: Optional[bool], edition: Optional[str], filters: Optional[Filters], id: Optional[str], libraries: Optional[List[PipelineLibrary]], name: Optional[str], notifications: Optional[List[Notifications]], photon: Optional[bool], serverless: Optional[bool], storage: Optional[str], target: Optional[str], trigger: Optional[PipelineTrigger]]) -> CreatePipelineResponse + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import pipelines + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + created = w.pipelines.create( + continuous=False, + name=f'sdk-{time.time_ns()}', + libraries=[pipelines.PipelineLibrary(notebook=pipelines.NotebookLibrary(path=notebook_path))], + clusters=[ + pipelines.PipelineCluster(instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + label="default", + num_workers=1, + custom_tags={ + "cluster_type": "default", + }) + ]) + + # cleanup + w.pipelines.delete(pipeline_id=created.pipeline_id) + + Create a pipeline. + + Creates a new data processing pipeline based on the requested configuration. If successful, this + method returns the ID of the new pipeline. + + :param allow_duplicate_names: bool (optional) + If false, deployment will fail if name conflicts with that of another pipeline. + :param catalog: str (optional) + A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables + in this pipeline are published to a `target` schema inside `catalog` (for example, + `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. + :param channel: str (optional) + DLT Release Channel that specifies which version to use. + :param clusters: List[:class:`PipelineCluster`] (optional) + Cluster settings for this pipeline deployment. + :param configuration: Dict[str,str] (optional) + String-String configuration for this pipeline execution. + :param continuous: bool (optional) + Whether the pipeline is continuous or triggered. This replaces `trigger`. + :param development: bool (optional) + Whether the pipeline is in Development mode. Defaults to false. + :param dry_run: bool (optional) + :param edition: str (optional) + Pipeline product edition. + :param filters: :class:`Filters` (optional) + Filters on which Pipeline packages to include in the deployed graph. + :param id: str (optional) + Unique identifier for this pipeline. + :param libraries: List[:class:`PipelineLibrary`] (optional) + Libraries or code needed by this deployment. + :param name: str (optional) + Friendly identifier for this pipeline. + :param notifications: List[:class:`Notifications`] (optional) + List of notification settings for this pipeline. + :param photon: bool (optional) + Whether Photon is enabled for this pipeline. + :param serverless: bool (optional) + Whether serverless compute is enabled for this pipeline. + :param storage: str (optional) + DBFS root directory for storing checkpoints and tables. + :param target: str (optional) + Target schema (database) to add tables in this pipeline to. If not specified, no data is published + to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. + :param trigger: :class:`PipelineTrigger` (optional) + Which pipeline trigger to use. Deprecated: Use `continuous` instead. + + :returns: :class:`CreatePipelineResponse` + + + .. py:method:: delete(pipeline_id: str) + + Delete a pipeline. + + Deletes a pipeline. + + :param pipeline_id: str + + + + + .. py:method:: get(pipeline_id: str) -> GetPipelineResponse + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import pipelines + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + created = w.pipelines.create( + continuous=False, + name=f'sdk-{time.time_ns()}', + libraries=[pipelines.PipelineLibrary(notebook=pipelines.NotebookLibrary(path=notebook_path))], + clusters=[ + pipelines.PipelineCluster(instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + label="default", + num_workers=1, + custom_tags={ + "cluster_type": "default", + }) + ]) + + by_id = w.pipelines.get(pipeline_id=created.pipeline_id) + + # cleanup + w.pipelines.delete(pipeline_id=created.pipeline_id) + + Get a pipeline. + + :param pipeline_id: str + + :returns: :class:`GetPipelineResponse` + + + .. py:method:: get_permission_levels(pipeline_id: str) -> GetPipelinePermissionLevelsResponse + + Get pipeline permission levels. + + Gets the permission levels that a user can have on an object. + + :param pipeline_id: str + The pipeline for which to get or manage permissions. + + :returns: :class:`GetPipelinePermissionLevelsResponse` + + + .. py:method:: get_permissions(pipeline_id: str) -> PipelinePermissions + + Get pipeline permissions. + + Gets the permissions of a pipeline. Pipelines can inherit permissions from their root object. + + :param pipeline_id: str + The pipeline for which to get or manage permissions. + + :returns: :class:`PipelinePermissions` + + + .. py:method:: get_update(pipeline_id: str, update_id: str) -> GetUpdateResponse + + Get a pipeline update. + + Gets an update from an active pipeline. + + :param pipeline_id: str + The ID of the pipeline. + :param update_id: str + The ID of the update. + + :returns: :class:`GetUpdateResponse` + + + .. py:method:: list_pipeline_events(pipeline_id: str [, filter: Optional[str], max_results: Optional[int], order_by: Optional[List[str]], page_token: Optional[str]]) -> Iterator[PipelineEvent] + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import pipelines + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + created = w.pipelines.create( + continuous=False, + name=f'sdk-{time.time_ns()}', + libraries=[pipelines.PipelineLibrary(notebook=pipelines.NotebookLibrary(path=notebook_path))], + clusters=[ + pipelines.PipelineCluster(instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + label="default", + num_workers=1, + custom_tags={ + "cluster_type": "default", + }) + ]) + + events = w.pipelines.list_pipeline_events(pipeline_id=created.pipeline_id) + + # cleanup + w.pipelines.delete(pipeline_id=created.pipeline_id) + + List pipeline events. + + Retrieves events for a pipeline. + + :param pipeline_id: str + :param filter: str (optional) + Criteria to select a subset of results, expressed using a SQL-like syntax. The supported filters + are: 1. level='INFO' (or WARN or ERROR) 2. level in ('INFO', 'WARN') 3. id='[event-id]' 4. timestamp + > 'TIMESTAMP' (or >=,<,<=,=) + + Composite expressions are supported, for example: level in ('ERROR', 'WARN') AND timestamp> + '2021-07-22T06:37:33.083Z' + :param max_results: int (optional) + Max number of entries to return in a single page. The system may return fewer than max_results + events in a response, even if there are more events available. + :param order_by: List[str] (optional) + A string indicating a sort order by timestamp for the results, for example, ["timestamp asc"]. The + sort order can be ascending or descending. By default, events are returned in descending order by + timestamp. + :param page_token: str (optional) + Page token returned by previous call. This field is mutually exclusive with all fields in this + request except max_results. An error is returned if any fields other than max_results are set when + this field is set. + + :returns: Iterator over :class:`PipelineEvent` + + + .. py:method:: list_pipelines( [, filter: Optional[str], max_results: Optional[int], order_by: Optional[List[str]], page_token: Optional[str]]) -> Iterator[PipelineStateInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import pipelines + + w = WorkspaceClient() + + all = w.pipelines.list_pipelines(pipelines.ListPipelinesRequest()) + + List pipelines. + + Lists pipelines defined in the Delta Live Tables system. + + :param filter: str (optional) + Select a subset of results based on the specified criteria. The supported filters are: + + * `notebook=''` to select pipelines that reference the provided notebook path. * `name LIKE + '[pattern]'` to select pipelines with a name that matches pattern. Wildcards are supported, for + example: `name LIKE '%shopping%'` + + Composite filters are not supported. This field is optional. + :param max_results: int (optional) + The maximum number of entries to return in a single page. The system may return fewer than + max_results events in a response, even if there are more events available. This field is optional. + The default value is 25. The maximum value is 100. An error is returned if the value of max_results + is greater than 100. + :param order_by: List[str] (optional) + A list of strings specifying the order of results. Supported order_by fields are id and name. The + default is id asc. This field is optional. + :param page_token: str (optional) + Page token returned by previous call + + :returns: Iterator over :class:`PipelineStateInfo` + + + .. py:method:: list_updates(pipeline_id: str [, max_results: Optional[int], page_token: Optional[str], until_update_id: Optional[str]]) -> ListUpdatesResponse + + List pipeline updates. + + List updates for an active pipeline. + + :param pipeline_id: str + The pipeline to return updates for. + :param max_results: int (optional) + Max number of entries to return in a single page. + :param page_token: str (optional) + Page token returned by previous call + :param until_update_id: str (optional) + If present, returns updates until and including this update_id. + + :returns: :class:`ListUpdatesResponse` + + + .. py:method:: reset(pipeline_id: str) -> Wait[GetPipelineResponse] + + Reset a pipeline. + + Resets a pipeline. + + :param pipeline_id: str + + :returns: + Long-running operation waiter for :class:`GetPipelineResponse`. + See :method:wait_get_pipeline_running for more details. + + + .. py:method:: reset_and_wait(pipeline_id: str, timeout: datetime.timedelta = 0:20:00) -> GetPipelineResponse + + + .. py:method:: set_permissions(pipeline_id: str [, access_control_list: Optional[List[PipelineAccessControlRequest]]]) -> PipelinePermissions + + Set pipeline permissions. + + Sets permissions on a pipeline. Pipelines can inherit permissions from their root object. + + :param pipeline_id: str + The pipeline for which to get or manage permissions. + :param access_control_list: List[:class:`PipelineAccessControlRequest`] (optional) + + :returns: :class:`PipelinePermissions` + + + .. py:method:: start_update(pipeline_id: str [, cause: Optional[StartUpdateCause], full_refresh: Optional[bool], full_refresh_selection: Optional[List[str]], refresh_selection: Optional[List[str]], validate_only: Optional[bool]]) -> StartUpdateResponse + + Start a pipeline. + + Starts a new update for the pipeline. If there is already an active update for the pipeline, the + request will fail and the active update will remain running. + + :param pipeline_id: str + :param cause: :class:`StartUpdateCause` (optional) + :param full_refresh: bool (optional) + If true, this update will reset all tables before running. + :param full_refresh_selection: List[str] (optional) + A list of tables to update with fullRefresh. If both refresh_selection and full_refresh_selection + are empty, this is a full graph update. Full Refresh on a table means that the states of the table + will be reset before the refresh. + :param refresh_selection: List[str] (optional) + A list of tables to update without fullRefresh. If both refresh_selection and full_refresh_selection + are empty, this is a full graph update. Full Refresh on a table means that the states of the table + will be reset before the refresh. + :param validate_only: bool (optional) + If true, this update only validates the correctness of pipeline source code but does not materialize + or publish any datasets. + + :returns: :class:`StartUpdateResponse` + + + .. py:method:: stop(pipeline_id: str) -> Wait[GetPipelineResponse] + + Stop a pipeline. + + Stops the pipeline by canceling the active update. If there is no active update for the pipeline, this + request is a no-op. + + :param pipeline_id: str + + :returns: + Long-running operation waiter for :class:`GetPipelineResponse`. + See :method:wait_get_pipeline_idle for more details. + + + .. py:method:: stop_and_wait(pipeline_id: str, timeout: datetime.timedelta = 0:20:00) -> GetPipelineResponse + + + .. py:method:: update(pipeline_id: str [, allow_duplicate_names: Optional[bool], catalog: Optional[str], channel: Optional[str], clusters: Optional[List[PipelineCluster]], configuration: Optional[Dict[str, str]], continuous: Optional[bool], development: Optional[bool], edition: Optional[str], expected_last_modified: Optional[int], filters: Optional[Filters], id: Optional[str], libraries: Optional[List[PipelineLibrary]], name: Optional[str], notifications: Optional[List[Notifications]], photon: Optional[bool], serverless: Optional[bool], storage: Optional[str], target: Optional[str], trigger: Optional[PipelineTrigger]]) + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import pipelines + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + created = w.pipelines.create( + continuous=False, + name=f'sdk-{time.time_ns()}', + libraries=[pipelines.PipelineLibrary(notebook=pipelines.NotebookLibrary(path=notebook_path))], + clusters=[ + pipelines.PipelineCluster(instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + label="default", + num_workers=1, + custom_tags={ + "cluster_type": "default", + }) + ]) + + w.pipelines.update( + pipeline_id=created.pipeline_id, + name=f'sdk-{time.time_ns()}', + libraries=[pipelines.PipelineLibrary(notebook=pipelines.NotebookLibrary(path=notebook_path))], + clusters=[ + pipelines.PipelineCluster(instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], + label="default", + num_workers=1, + custom_tags={ + "cluster_type": "default", + }) + ]) + + # cleanup + w.pipelines.delete(pipeline_id=created.pipeline_id) + + Edit a pipeline. + + Updates a pipeline with the supplied configuration. + + :param pipeline_id: str + Unique identifier for this pipeline. + :param allow_duplicate_names: bool (optional) + If false, deployment will fail if name has changed and conflicts the name of another pipeline. + :param catalog: str (optional) + A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables + in this pipeline are published to a `target` schema inside `catalog` (for example, + `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. + :param channel: str (optional) + DLT Release Channel that specifies which version to use. + :param clusters: List[:class:`PipelineCluster`] (optional) + Cluster settings for this pipeline deployment. + :param configuration: Dict[str,str] (optional) + String-String configuration for this pipeline execution. + :param continuous: bool (optional) + Whether the pipeline is continuous or triggered. This replaces `trigger`. + :param development: bool (optional) + Whether the pipeline is in Development mode. Defaults to false. + :param edition: str (optional) + Pipeline product edition. + :param expected_last_modified: int (optional) + If present, the last-modified time of the pipeline settings before the edit. If the settings were + modified after that time, then the request will fail with a conflict. + :param filters: :class:`Filters` (optional) + Filters on which Pipeline packages to include in the deployed graph. + :param id: str (optional) + Unique identifier for this pipeline. + :param libraries: List[:class:`PipelineLibrary`] (optional) + Libraries or code needed by this deployment. + :param name: str (optional) + Friendly identifier for this pipeline. + :param notifications: List[:class:`Notifications`] (optional) + List of notification settings for this pipeline. + :param photon: bool (optional) + Whether Photon is enabled for this pipeline. + :param serverless: bool (optional) + Whether serverless compute is enabled for this pipeline. + :param storage: str (optional) + DBFS root directory for storing checkpoints and tables. + :param target: str (optional) + Target schema (database) to add tables in this pipeline to. If not specified, no data is published + to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. + :param trigger: :class:`PipelineTrigger` (optional) + Which pipeline trigger to use. Deprecated: Use `continuous` instead. + + + + + .. py:method:: update_permissions(pipeline_id: str [, access_control_list: Optional[List[PipelineAccessControlRequest]]]) -> PipelinePermissions + + Update pipeline permissions. + + Updates the permissions on a pipeline. Pipelines can inherit permissions from their root object. + + :param pipeline_id: str + The pipeline for which to get or manage permissions. + :param access_control_list: List[:class:`PipelineAccessControlRequest`] (optional) + + :returns: :class:`PipelinePermissions` + + + .. py:method:: wait_get_pipeline_idle(pipeline_id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[GetPipelineResponse], None]]) -> GetPipelineResponse + + + .. py:method:: wait_get_pipeline_running(pipeline_id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[GetPipelineResponse], None]]) -> GetPipelineResponse diff --git a/docs/workspace/serving/apps.rst b/docs/workspace/serving/apps.rst new file mode 100644 index 000000000..bd2f6bed2 --- /dev/null +++ b/docs/workspace/serving/apps.rst @@ -0,0 +1,81 @@ +``w.apps``: Databricks Apps +=========================== +.. currentmodule:: databricks.sdk.service.serving + +.. py:class:: AppsAPI + + Lakehouse Apps run directly on a customer’s Databricks instance, integrate with their data, use and + extend Databricks services, and enable users to interact through single sign-on. + + .. py:method:: create(manifest: AppManifest [, resources: Optional[Any]]) -> DeploymentStatus + + Create and deploy an application. + + Creates and deploys an application. + + :param manifest: :class:`AppManifest` + Manifest that specifies the application requirements + :param resources: Any (optional) + Information passed at app deployment time to fulfill app dependencies + + :returns: :class:`DeploymentStatus` + + + .. py:method:: delete_app(name: str) -> DeleteAppResponse + + Delete an application. + + Delete an application definition + + :param name: str + The name of an application. This field is required. + + :returns: :class:`DeleteAppResponse` + + + .. py:method:: get_app(name: str) -> GetAppResponse + + Get definition for an application. + + Get an application definition + + :param name: str + The name of an application. This field is required. + + :returns: :class:`GetAppResponse` + + + .. py:method:: get_app_deployment_status(deployment_id: str [, include_app_log: Optional[str]]) -> DeploymentStatus + + Get deployment status for an application. + + Get deployment status for an application + + :param deployment_id: str + The deployment id for an application. This field is required. + :param include_app_log: str (optional) + Boolean flag to include application logs + + :returns: :class:`DeploymentStatus` + + + .. py:method:: get_apps() -> ListAppsResponse + + List all applications. + + List all available applications + + :returns: :class:`ListAppsResponse` + + + .. py:method:: get_events(name: str) -> ListAppEventsResponse + + Get deployment events for an application. + + Get deployment events for an application + + :param name: str + The name of an application. This field is required. + + :returns: :class:`ListAppEventsResponse` + \ No newline at end of file diff --git a/docs/workspace/serving/index.rst b/docs/workspace/serving/index.rst new file mode 100644 index 000000000..ce3d216ff --- /dev/null +++ b/docs/workspace/serving/index.rst @@ -0,0 +1,11 @@ + +Real-time Serving +================= + +Use real-time inference for machine learning + +.. toctree:: + :maxdepth: 1 + + apps + serving_endpoints \ No newline at end of file diff --git a/docs/workspace/serving/serving_endpoints.rst b/docs/workspace/serving/serving_endpoints.rst new file mode 100644 index 000000000..8972b0877 --- /dev/null +++ b/docs/workspace/serving/serving_endpoints.rst @@ -0,0 +1,276 @@ +``w.serving_endpoints``: Serving endpoints +========================================== +.. currentmodule:: databricks.sdk.service.serving + +.. py:class:: ServingEndpointsAPI + + The Serving Endpoints API allows you to create, update, and delete model serving endpoints. + + You can use a serving endpoint to serve models from the Databricks Model Registry or from Unity Catalog. + Endpoints expose the underlying models as scalable REST API endpoints using serverless compute. This means + the endpoints and associated compute resources are fully managed by Databricks and will not appear in your + cloud account. A serving endpoint can consist of one or more MLflow models from the Databricks Model + Registry, called served entities. A serving endpoint can have at most ten served entities. You can + configure traffic settings to define how requests should be routed to your served entities behind an + endpoint. Additionally, you can configure the scale of resources that should be applied to each served + entity. + + .. py:method:: build_logs(name: str, served_model_name: str) -> BuildLogsResponse + + Retrieve the logs associated with building the model's environment for a given serving endpoint's + served model. + + Retrieves the build logs associated with the provided served model. + + :param name: str + The name of the serving endpoint that the served model belongs to. This field is required. + :param served_model_name: str + The name of the served model that build logs will be retrieved for. This field is required. + + :returns: :class:`BuildLogsResponse` + + + .. py:method:: create(name: str, config: EndpointCoreConfigInput [, rate_limits: Optional[List[RateLimit]], tags: Optional[List[EndpointTag]]]) -> Wait[ServingEndpointDetailed] + + Create a new serving endpoint. + + :param name: str + The name of the serving endpoint. This field is required and must be unique across a Databricks + workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. + :param config: :class:`EndpointCoreConfigInput` + The core config of the serving endpoint. + :param rate_limits: List[:class:`RateLimit`] (optional) + Rate limits to be applied to the serving endpoint. NOTE: only external and foundation model + endpoints are supported as of now. + :param tags: List[:class:`EndpointTag`] (optional) + Tags to be attached to the serving endpoint and automatically propagated to billing logs. + + :returns: + Long-running operation waiter for :class:`ServingEndpointDetailed`. + See :method:wait_get_serving_endpoint_not_updating for more details. + + + .. py:method:: create_and_wait(name: str, config: EndpointCoreConfigInput [, rate_limits: Optional[List[RateLimit]], tags: Optional[List[EndpointTag]], timeout: datetime.timedelta = 0:20:00]) -> ServingEndpointDetailed + + + .. py:method:: delete(name: str) + + Delete a serving endpoint. + + :param name: str + The name of the serving endpoint. This field is required. + + + + + .. py:method:: export_metrics(name: str) + + Retrieve the metrics associated with a serving endpoint. + + Retrieves the metrics associated with the provided serving endpoint in either Prometheus or + OpenMetrics exposition format. + + :param name: str + The name of the serving endpoint to retrieve metrics for. This field is required. + + + + + .. py:method:: get(name: str) -> ServingEndpointDetailed + + Get a single serving endpoint. + + Retrieves the details for a single serving endpoint. + + :param name: str + The name of the serving endpoint. This field is required. + + :returns: :class:`ServingEndpointDetailed` + + + .. py:method:: get_permission_levels(serving_endpoint_id: str) -> GetServingEndpointPermissionLevelsResponse + + Get serving endpoint permission levels. + + Gets the permission levels that a user can have on an object. + + :param serving_endpoint_id: str + The serving endpoint for which to get or manage permissions. + + :returns: :class:`GetServingEndpointPermissionLevelsResponse` + + + .. py:method:: get_permissions(serving_endpoint_id: str) -> ServingEndpointPermissions + + Get serving endpoint permissions. + + Gets the permissions of a serving endpoint. Serving endpoints can inherit permissions from their root + object. + + :param serving_endpoint_id: str + The serving endpoint for which to get or manage permissions. + + :returns: :class:`ServingEndpointPermissions` + + + .. py:method:: list() -> Iterator[ServingEndpoint] + + Retrieve all serving endpoints. + + :returns: Iterator over :class:`ServingEndpoint` + + + .. py:method:: logs(name: str, served_model_name: str) -> ServerLogsResponse + + Retrieve the most recent log lines associated with a given serving endpoint's served model. + + Retrieves the service logs associated with the provided served model. + + :param name: str + The name of the serving endpoint that the served model belongs to. This field is required. + :param served_model_name: str + The name of the served model that logs will be retrieved for. This field is required. + + :returns: :class:`ServerLogsResponse` + + + .. py:method:: patch(name: str [, add_tags: Optional[List[EndpointTag]], delete_tags: Optional[List[str]]]) -> Iterator[EndpointTag] + + Patch the tags of a serving endpoint. + + Used to batch add and delete tags from a serving endpoint with a single API call. + + :param name: str + The name of the serving endpoint who's tags to patch. This field is required. + :param add_tags: List[:class:`EndpointTag`] (optional) + List of endpoint tags to add + :param delete_tags: List[str] (optional) + List of tag keys to delete + + :returns: Iterator over :class:`EndpointTag` + + + .. py:method:: put(name: str [, rate_limits: Optional[List[RateLimit]]]) -> PutResponse + + Update the rate limits of a serving endpoint. + + Used to update the rate limits of a serving endpoint. NOTE: only external and foundation model + endpoints are supported as of now. + + :param name: str + The name of the serving endpoint whose rate limits are being updated. This field is required. + :param rate_limits: List[:class:`RateLimit`] (optional) + The list of endpoint rate limits. + + :returns: :class:`PutResponse` + + + .. py:method:: query(name: str [, dataframe_records: Optional[List[Any]], dataframe_split: Optional[DataframeSplitInput], extra_params: Optional[Dict[str, str]], input: Optional[Any], inputs: Optional[Any], instances: Optional[List[Any]], max_tokens: Optional[int], messages: Optional[List[ChatMessage]], n: Optional[int], prompt: Optional[Any], stop: Optional[List[str]], stream: Optional[bool], temperature: Optional[float]]) -> QueryEndpointResponse + + Query a serving endpoint with provided model input. + + :param name: str + The name of the serving endpoint. This field is required. + :param dataframe_records: List[Any] (optional) + Pandas Dataframe input in the records orientation. + :param dataframe_split: :class:`DataframeSplitInput` (optional) + Pandas Dataframe input in the split orientation. + :param extra_params: Dict[str,str] (optional) + The extra parameters field used ONLY for __completions, chat,__ and __embeddings external & + foundation model__ serving endpoints. This is a map of strings and should only be used with other + external/foundation model query fields. + :param input: Any (optional) + The input string (or array of strings) field used ONLY for __embeddings external & foundation + model__ serving endpoints and is the only field (along with extra_params if needed) used by + embeddings queries. + :param inputs: Any (optional) + Tensor-based input in columnar format. + :param instances: List[Any] (optional) + Tensor-based input in row format. + :param max_tokens: int (optional) + The max tokens field used ONLY for __completions__ and __chat external & foundation model__ serving + endpoints. This is an integer and should only be used with other chat/completions query fields. + :param messages: List[:class:`ChatMessage`] (optional) + The messages field used ONLY for __chat external & foundation model__ serving endpoints. This is a + map of strings and should only be used with other chat query fields. + :param n: int (optional) + The n (number of candidates) field used ONLY for __completions__ and __chat external & foundation + model__ serving endpoints. This is an integer between 1 and 5 with a default of 1 and should only be + used with other chat/completions query fields. + :param prompt: Any (optional) + The prompt string (or array of strings) field used ONLY for __completions external & foundation + model__ serving endpoints and should only be used with other completions query fields. + :param stop: List[str] (optional) + The stop sequences field used ONLY for __completions__ and __chat external & foundation model__ + serving endpoints. This is a list of strings and should only be used with other chat/completions + query fields. + :param stream: bool (optional) + The stream field used ONLY for __completions__ and __chat external & foundation model__ serving + endpoints. This is a boolean defaulting to false and should only be used with other chat/completions + query fields. + :param temperature: float (optional) + The temperature field used ONLY for __completions__ and __chat external & foundation model__ serving + endpoints. This is a float between 0.0 and 2.0 with a default of 1.0 and should only be used with + other chat/completions query fields. + + :returns: :class:`QueryEndpointResponse` + + + .. py:method:: set_permissions(serving_endpoint_id: str [, access_control_list: Optional[List[ServingEndpointAccessControlRequest]]]) -> ServingEndpointPermissions + + Set serving endpoint permissions. + + Sets permissions on a serving endpoint. Serving endpoints can inherit permissions from their root + object. + + :param serving_endpoint_id: str + The serving endpoint for which to get or manage permissions. + :param access_control_list: List[:class:`ServingEndpointAccessControlRequest`] (optional) + + :returns: :class:`ServingEndpointPermissions` + + + .. py:method:: update_config(name: str [, auto_capture_config: Optional[AutoCaptureConfigInput], served_entities: Optional[List[ServedEntityInput]], served_models: Optional[List[ServedModelInput]], traffic_config: Optional[TrafficConfig]]) -> Wait[ServingEndpointDetailed] + + Update a serving endpoint with a new config. + + Updates any combination of the serving endpoint's served entities, the compute configuration of those + served entities, and the endpoint's traffic config. An endpoint that already has an update in progress + can not be updated until the current update completes or fails. + + :param name: str + The name of the serving endpoint to update. This field is required. + :param auto_capture_config: :class:`AutoCaptureConfigInput` (optional) + Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. + :param served_entities: List[:class:`ServedEntityInput`] (optional) + A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served + entities. + :param served_models: List[:class:`ServedModelInput`] (optional) + (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A + serving endpoint can have up to 10 served models. + :param traffic_config: :class:`TrafficConfig` (optional) + The traffic config defining how invocations to the serving endpoint should be routed. + + :returns: + Long-running operation waiter for :class:`ServingEndpointDetailed`. + See :method:wait_get_serving_endpoint_not_updating for more details. + + + .. py:method:: update_config_and_wait(name: str [, auto_capture_config: Optional[AutoCaptureConfigInput], served_entities: Optional[List[ServedEntityInput]], served_models: Optional[List[ServedModelInput]], traffic_config: Optional[TrafficConfig], timeout: datetime.timedelta = 0:20:00]) -> ServingEndpointDetailed + + + .. py:method:: update_permissions(serving_endpoint_id: str [, access_control_list: Optional[List[ServingEndpointAccessControlRequest]]]) -> ServingEndpointPermissions + + Update serving endpoint permissions. + + Updates the permissions on a serving endpoint. Serving endpoints can inherit permissions from their + root object. + + :param serving_endpoint_id: str + The serving endpoint for which to get or manage permissions. + :param access_control_list: List[:class:`ServingEndpointAccessControlRequest`] (optional) + + :returns: :class:`ServingEndpointPermissions` + + + .. py:method:: wait_get_serving_endpoint_not_updating(name: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[ServingEndpointDetailed], None]]) -> ServingEndpointDetailed diff --git a/docs/workspace/settings/credentials_manager.rst b/docs/workspace/settings/credentials_manager.rst new file mode 100644 index 000000000..1767ba34e --- /dev/null +++ b/docs/workspace/settings/credentials_manager.rst @@ -0,0 +1,23 @@ +``w.credentials_manager``: Credentials Manager +============================================== +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: CredentialsManagerAPI + + Credentials manager interacts with with Identity Providers to to perform token exchanges using stored + credentials and refresh tokens. + + .. py:method:: exchange_token(partition_id: PartitionId, token_type: List[TokenType], scopes: List[str]) -> ExchangeTokenResponse + + Exchange token. + + Exchange tokens with an Identity Provider to get a new access token. It allowes specifying scopes to + determine token permissions. + + :param partition_id: :class:`PartitionId` + :param token_type: List[:class:`TokenType`] + :param scopes: List[str] + Array of scopes for the token request. + + :returns: :class:`ExchangeTokenResponse` + \ No newline at end of file diff --git a/docs/workspace/settings/index.rst b/docs/workspace/settings/index.rst new file mode 100644 index 000000000..a524f671d --- /dev/null +++ b/docs/workspace/settings/index.rst @@ -0,0 +1,15 @@ + +Settings +======== + +Manage security settings for Accounts and Workspaces + +.. toctree:: + :maxdepth: 1 + + credentials_manager + ip_access_lists + settings + token_management + tokens + workspace_conf \ No newline at end of file diff --git a/docs/workspace/settings/ip_access_lists.rst b/docs/workspace/settings/ip_access_lists.rst new file mode 100644 index 000000000..a265c5943 --- /dev/null +++ b/docs/workspace/settings/ip_access_lists.rst @@ -0,0 +1,229 @@ +``w.ip_access_lists``: IP Access Lists +====================================== +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: IpAccessListsAPI + + IP Access List enables admins to configure IP access lists. + + IP access lists affect web application access and REST API access to this workspace only. If the feature + is disabled for a workspace, all access is allowed for this workspace. There is support for allow lists + (inclusion) and block lists (exclusion). + + When a connection is attempted: 1. **First, all block lists are checked.** If the connection IP address + matches any block list, the connection is rejected. 2. **If the connection was not rejected by block + lists**, the IP address is compared with the allow lists. + + If there is at least one allow list for the workspace, the connection is allowed only if the IP address + matches an allow list. If there are no allow lists for the workspace, all IP addresses are allowed. + + For all allow lists and block lists combined, the workspace supports a maximum of 1000 IP/CIDR values, + where one CIDR counts as a single value. + + After changes to the IP access list feature, it can take a few minutes for changes to take effect. + + .. py:method:: create(label: str, list_type: ListType [, ip_addresses: Optional[List[str]]]) -> CreateIpAccessListResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import settings + + w = WorkspaceClient() + + created = w.ip_access_lists.create(label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/16"], + list_type=settings.ListType.BLOCK) + + # cleanup + w.ip_access_lists.delete(ip_access_list_id=created.ip_access_list.list_id) + + Create access list. + + Creates an IP access list for this workspace. + + A list can be an allow list or a block list. See the top of this file for a description of how the + server treats allow lists and block lists at runtime. + + When creating or updating an IP access list: + + * For all allow lists and block lists combined, the API supports a maximum of 1000 IP/CIDR values, + where one CIDR counts as a single value. Attempts to exceed that number return error 400 with + `error_code` value `QUOTA_EXCEEDED`. * If the new list would block the calling user's current IP, + error 400 is returned with `error_code` value `INVALID_STATE`. + + It can take a few minutes for the changes to take effect. **Note**: Your new IP access list has no + effect until you enable the feature. See :method:workspaceconf/setStatus + + :param label: str + Label for the IP access list. This **cannot** be empty. + :param list_type: :class:`ListType` + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. + :param ip_addresses: List[str] (optional) + + :returns: :class:`CreateIpAccessListResponse` + + + .. py:method:: delete(ip_access_list_id: str) + + Delete access list. + + Deletes an IP access list, specified by its list ID. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + + + + + .. py:method:: get(ip_access_list_id: str) -> FetchIpAccessListResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import settings + + w = WorkspaceClient() + + created = w.ip_access_lists.create(label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/16"], + list_type=settings.ListType.BLOCK) + + by_id = w.ip_access_lists.get(ip_access_list_id=created.ip_access_list.list_id) + + # cleanup + w.ip_access_lists.delete(ip_access_list_id=created.ip_access_list.list_id) + + Get access list. + + Gets an IP access list, specified by its list ID. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + + :returns: :class:`FetchIpAccessListResponse` + + + .. py:method:: list() -> Iterator[IpAccessListInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.ip_access_lists.list() + + Get access lists. + + Gets all IP access lists for the specified workspace. + + :returns: Iterator over :class:`IpAccessListInfo` + + + .. py:method:: replace(ip_access_list_id: str, label: str, list_type: ListType, enabled: bool [, ip_addresses: Optional[List[str]]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import settings + + w = WorkspaceClient() + + created = w.ip_access_lists.create(label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/16"], + list_type=settings.ListType.BLOCK) + + w.ip_access_lists.replace(ip_access_list_id=created.ip_access_list.list_id, + label=f'sdk-{time.time_ns()}', + ip_addresses=["1.0.0.0/24"], + list_type=settings.ListType.BLOCK, + enabled=False) + + # cleanup + w.ip_access_lists.delete(ip_access_list_id=created.ip_access_list.list_id) + + Replace access list. + + Replaces an IP access list, specified by its ID. + + A list can include allow lists and block lists. See the top of this file for a description of how the + server treats allow lists and block lists at run time. When replacing an IP access list: * For all + allow lists and block lists combined, the API supports a maximum of 1000 IP/CIDR values, where one + CIDR counts as a single value. Attempts to exceed that number return error 400 with `error_code` value + `QUOTA_EXCEEDED`. * If the resulting list would block the calling user's current IP, error 400 is + returned with `error_code` value `INVALID_STATE`. It can take a few minutes for the changes to take + effect. Note that your resulting IP access list has no effect until you enable the feature. See + :method:workspaceconf/setStatus. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + :param label: str + Label for the IP access list. This **cannot** be empty. + :param list_type: :class:`ListType` + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. + :param enabled: bool + Specifies whether this IP access list is enabled. + :param ip_addresses: List[str] (optional) + + + + + .. py:method:: update(ip_access_list_id: str [, enabled: Optional[bool], ip_addresses: Optional[List[str]], label: Optional[str], list_type: Optional[ListType]]) + + Update access list. + + Updates an existing IP access list, specified by its ID. + + A list can include allow lists and block lists. See the top of this file for a description of how the + server treats allow lists and block lists at run time. + + When updating an IP access list: + + * For all allow lists and block lists combined, the API supports a maximum of 1000 IP/CIDR values, + where one CIDR counts as a single value. Attempts to exceed that number return error 400 with + `error_code` value `QUOTA_EXCEEDED`. * If the updated list would block the calling user's current IP, + error 400 is returned with `error_code` value `INVALID_STATE`. + + It can take a few minutes for the changes to take effect. Note that your resulting IP access list has + no effect until you enable the feature. See :method:workspaceconf/setStatus. + + :param ip_access_list_id: str + The ID for the corresponding IP access list + :param enabled: bool (optional) + Specifies whether this IP access list is enabled. + :param ip_addresses: List[str] (optional) + :param label: str (optional) + Label for the IP access list. This **cannot** be empty. + :param list_type: :class:`ListType` (optional) + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. + + + \ No newline at end of file diff --git a/docs/workspace/settings/settings.rst b/docs/workspace/settings/settings.rst new file mode 100644 index 000000000..0395213d3 --- /dev/null +++ b/docs/workspace/settings/settings.rst @@ -0,0 +1,83 @@ +``w.settings``: Default Namespace +================================= +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: SettingsAPI + + The default namespace setting API allows users to configure the default namespace for a Databricks + workspace. + + Through this API, users can retrieve, set, or modify the default namespace used when queries do not + reference a fully qualified three-level name. For example, if you use the API to set 'retail_prod' as the + default catalog, then a query 'SELECT * FROM myTable' would reference the object + 'retail_prod.default.myTable' (the schema 'default' is always assumed). + + This setting requires a restart of clusters and SQL warehouses to take effect. Additionally, the default + namespace only applies when using Unity Catalog-enabled compute. + + .. py:method:: delete_default_workspace_namespace(etag: str) -> DeleteDefaultWorkspaceNamespaceResponse + + Delete the default namespace setting. + + Deletes the default namespace setting for the workspace. A fresh etag needs to be provided in DELETE + requests (as a query parameter). The etag can be retrieved by making a GET request before the DELETE + request. If the setting is updated/deleted concurrently, DELETE will fail with 409 and the request + will need to be retried by using the fresh etag in the 409 response. + + :param etag: str + etag used for versioning. The response is at least as fresh as the eTag provided. This is used for + optimistic concurrency control as a way to help prevent simultaneous writes of a setting overwriting + each other. It is strongly suggested that systems make use of the etag in the read -> delete pattern + to perform setting deletions in order to avoid race conditions. That is, get an etag from a GET + request, and pass it with the DELETE request to identify the rule set version you are deleting. + + :returns: :class:`DeleteDefaultWorkspaceNamespaceResponse` + + + .. py:method:: read_default_workspace_namespace(etag: str) -> DefaultNamespaceSetting + + Get the default namespace setting. + + Gets the default namespace setting. + + :param etag: str + etag used for versioning. The response is at least as fresh as the eTag provided. This is used for + optimistic concurrency control as a way to help prevent simultaneous writes of a setting overwriting + each other. It is strongly suggested that systems make use of the etag in the read -> delete pattern + to perform setting deletions in order to avoid race conditions. That is, get an etag from a GET + request, and pass it with the DELETE request to identify the rule set version you are deleting. + + :returns: :class:`DefaultNamespaceSetting` + + + .. py:method:: update_default_workspace_namespace( [, allow_missing: Optional[bool], field_mask: Optional[str], setting: Optional[DefaultNamespaceSetting]]) -> DefaultNamespaceSetting + + Update the default namespace setting. + + Updates the default namespace setting for the workspace. A fresh etag needs to be provided in PATCH + requests (as part of the setting field). The etag can be retrieved by making a GET request before the + PATCH request. Note that if the setting does not exist, GET will return a NOT_FOUND error and the etag + will be present in the error response, which should be set in the PATCH request. If the setting is + updated concurrently, PATCH will fail with 409 and the request will need to be retried by using the + fresh etag in the 409 response. + + :param allow_missing: bool (optional) + This should always be set to true for Settings API. Added for AIP compliance. + :param field_mask: str (optional) + Field mask is required to be passed into the PATCH request. Field mask specifies which fields of the + setting payload will be updated. For example, for Default Namespace setting, the field mask is + supposed to contain fields from the DefaultNamespaceSetting.namespace schema. + + The field mask needs to be supplied as single string. To specify multiple fields in the field mask, + use comma as the seperator (no space). + :param setting: :class:`DefaultNamespaceSetting` (optional) + This represents the setting configuration for the default namespace in the Databricks workspace. + Setting the default catalog for the workspace determines the catalog that is used when queries do + not reference a fully qualified 3 level name. For example, if the default catalog is set to + 'retail_prod' then a query 'SELECT * FROM myTable' would reference the object + 'retail_prod.default.myTable' (the schema 'default' is always assumed). This setting requires a + restart of clusters and SQL warehouses to take effect. Additionally, the default namespace only + applies when using Unity Catalog-enabled compute. + + :returns: :class:`DefaultNamespaceSetting` + \ No newline at end of file diff --git a/docs/workspace/settings/token_management.rst b/docs/workspace/settings/token_management.rst new file mode 100644 index 000000000..cf1860419 --- /dev/null +++ b/docs/workspace/settings/token_management.rst @@ -0,0 +1,162 @@ +``w.token_management``: Token management +======================================== +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: TokenManagementAPI + + Enables administrators to get all tokens and delete tokens for other users. Admins can either get every + token, get a specific token by ID, or get all tokens for a particular user. + + .. py:method:: create_obo_token(application_id: str, lifetime_seconds: int [, comment: Optional[str]]) -> CreateOboTokenResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + groups = w.groups.group_display_name_to_id_map(iam.ListGroupsRequest()) + + spn = w.service_principals.create(display_name=f'sdk-{time.time_ns()}', + groups=[iam.ComplexValue(value=groups["admins"])]) + + obo = w.token_management.create_obo_token(application_id=spn.application_id, lifetime_seconds=60) + + # cleanup + w.service_principals.delete(id=spn.id) + w.token_management.delete(token_id=obo.token_info.token_id) + + Create on-behalf token. + + Creates a token on behalf of a service principal. + + :param application_id: str + Application ID of the service principal. + :param lifetime_seconds: int + The number of seconds before the token expires. + :param comment: str (optional) + Comment that describes the purpose of the token. + + :returns: :class:`CreateOboTokenResponse` + + + .. py:method:: delete(token_id: str) + + Delete a token. + + Deletes a token, specified by its ID. + + :param token_id: str + The ID of the token to get. + + + + + .. py:method:: get(token_id: str) -> TokenInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import iam + + w = WorkspaceClient() + + groups = w.groups.group_display_name_to_id_map(iam.ListGroupsRequest()) + + spn = w.service_principals.create(display_name=f'sdk-{time.time_ns()}', + groups=[iam.ComplexValue(value=groups["admins"])]) + + obo = w.token_management.create_obo_token(application_id=spn.application_id, lifetime_seconds=60) + + by_id = w.token_management.get(token_id=obo.token_info.token_id) + + # cleanup + w.service_principals.delete(id=spn.id) + w.token_management.delete(token_id=obo.token_info.token_id) + + Get token info. + + Gets information about a token, specified by its ID. + + :param token_id: str + The ID of the token to get. + + :returns: :class:`TokenInfo` + + + .. py:method:: get_permission_levels() -> GetTokenPermissionLevelsResponse + + Get token permission levels. + + Gets the permission levels that a user can have on an object. + + :returns: :class:`GetTokenPermissionLevelsResponse` + + + .. py:method:: get_permissions() -> TokenPermissions + + Get token permissions. + + Gets the permissions of all tokens. Tokens can inherit permissions from their root object. + + :returns: :class:`TokenPermissions` + + + .. py:method:: list( [, created_by_id: Optional[str], created_by_username: Optional[str]]) -> Iterator[TokenInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import settings + + w = WorkspaceClient() + + all = w.token_management.list(settings.ListTokenManagementRequest()) + + List all tokens. + + Lists all tokens associated with the specified workspace or user. + + :param created_by_id: str (optional) + User ID of the user that created the token. + :param created_by_username: str (optional) + Username of the user that created the token. + + :returns: Iterator over :class:`TokenInfo` + + + .. py:method:: set_permissions( [, access_control_list: Optional[List[TokenAccessControlRequest]]]) -> TokenPermissions + + Set token permissions. + + Sets permissions on all tokens. Tokens can inherit permissions from their root object. + + :param access_control_list: List[:class:`TokenAccessControlRequest`] (optional) + + :returns: :class:`TokenPermissions` + + + .. py:method:: update_permissions( [, access_control_list: Optional[List[TokenAccessControlRequest]]]) -> TokenPermissions + + Update token permissions. + + Updates the permissions on all tokens. Tokens can inherit permissions from their root object. + + :param access_control_list: List[:class:`TokenAccessControlRequest`] (optional) + + :returns: :class:`TokenPermissions` + \ No newline at end of file diff --git a/docs/workspace/settings/tokens.rst b/docs/workspace/settings/tokens.rst new file mode 100644 index 000000000..899db00d1 --- /dev/null +++ b/docs/workspace/settings/tokens.rst @@ -0,0 +1,76 @@ +``w.tokens``: Token +=================== +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: TokensAPI + + The Token API allows you to create, list, and revoke tokens that can be used to authenticate and access + Databricks REST APIs. + + .. py:method:: create( [, comment: Optional[str], lifetime_seconds: Optional[int]]) -> CreateTokenResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + token = w.tokens.create(comment=f'sdk-{time.time_ns()}', lifetime_seconds=300) + + # cleanup + w.tokens.delete(token_id=token.token_info.token_id) + + Create a user token. + + Creates and returns a token for a user. If this call is made through token authentication, it creates + a token with the same client ID as the authenticated token. If the user's token quota is exceeded, + this call returns an error **QUOTA_EXCEEDED**. + + :param comment: str (optional) + Optional description to attach to the token. + :param lifetime_seconds: int (optional) + The lifetime of the token, in seconds. + + If the lifetime is not specified, this token remains valid indefinitely. + + :returns: :class:`CreateTokenResponse` + + + .. py:method:: delete(token_id: str) + + Revoke token. + + Revokes an access token. + + If a token with the specified ID is not valid, this call returns an error **RESOURCE_DOES_NOT_EXIST**. + + :param token_id: str + The ID of the token to be revoked. + + + + + .. py:method:: list() -> Iterator[PublicTokenInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.tokens.list() + + List tokens. + + Lists all the valid tokens for a user-workspace pair. + + :returns: Iterator over :class:`PublicTokenInfo` + \ No newline at end of file diff --git a/docs/workspace/settings/workspace_conf.rst b/docs/workspace/settings/workspace_conf.rst new file mode 100644 index 000000000..892819383 --- /dev/null +++ b/docs/workspace/settings/workspace_conf.rst @@ -0,0 +1,39 @@ +``w.workspace_conf``: Workspace Conf +==================================== +.. currentmodule:: databricks.sdk.service.settings + +.. py:class:: WorkspaceConfAPI + + This API allows updating known workspace settings for advanced users. + + .. py:method:: get_status(keys: str) -> WorkspaceConf + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + conf = w.workspace_conf.get_status(keys="enableWorkspaceFilesystem") + + Check configuration status. + + Gets the configuration status for a workspace. + + :param keys: str + + :returns: Dict[str,str] + + + .. py:method:: set_status() + + Enable/disable features. + + Sets the configuration status for a workspace, including enabling or disabling it. + + + + \ No newline at end of file diff --git a/docs/workspace/sharing/clean_rooms.rst b/docs/workspace/sharing/clean_rooms.rst new file mode 100644 index 000000000..827b39f0d --- /dev/null +++ b/docs/workspace/sharing/clean_rooms.rst @@ -0,0 +1,103 @@ +``w.clean_rooms``: Clean Rooms +============================== +.. currentmodule:: databricks.sdk.service.sharing + +.. py:class:: CleanRoomsAPI + + A clean room is a secure, privacy-protecting environment where two or more parties can share sensitive + enterprise data, including customer data, for measurements, insights, activation and other use cases. + + To create clean rooms, you must be a metastore admin or a user with the **CREATE_CLEAN_ROOM** privilege. + + .. py:method:: create(name: str, remote_detailed_info: CentralCleanRoomInfo [, comment: Optional[str]]) -> CleanRoomInfo + + Create a clean room. + + Creates a new clean room with specified colaborators. The caller must be a metastore admin or have the + **CREATE_CLEAN_ROOM** privilege on the metastore. + + :param name: str + Name of the clean room. + :param remote_detailed_info: :class:`CentralCleanRoomInfo` + Central clean room details. + :param comment: str (optional) + User-provided free-form text description. + + :returns: :class:`CleanRoomInfo` + + + .. py:method:: delete(name_arg: str) + + Delete a clean room. + + Deletes a data object clean room from the metastore. The caller must be an owner of the clean room. + + :param name_arg: str + The name of the clean room. + + + + + .. py:method:: get(name_arg: str [, include_remote_details: Optional[bool]]) -> CleanRoomInfo + + Get a clean room. + + Gets a data object clean room from the metastore. The caller must be a metastore admin or the owner of + the clean room. + + :param name_arg: str + The name of the clean room. + :param include_remote_details: bool (optional) + Whether to include remote details (central) on the clean room. + + :returns: :class:`CleanRoomInfo` + + + .. py:method:: list( [, max_results: Optional[int], page_token: Optional[str]]) -> Iterator[CleanRoomInfo] + + List clean rooms. + + Gets an array of data object clean rooms from the metastore. The caller must be a metastore admin or + the owner of the clean room. There is no guarantee of a specific ordering of the elements in the + array. + + :param max_results: int (optional) + Maximum number of clean rooms to return. If not set, all the clean rooms are returned (not + recommended). - when set to a value greater than 0, the page length is the minimum of this value and + a server configured value; - when set to 0, the page length is set to a server configured value + (recommended); - when set to a value less than 0, an invalid parameter error is returned; + :param page_token: str (optional) + Opaque pagination token to go to next page based on previous query. + + :returns: Iterator over :class:`CleanRoomInfo` + + + .. py:method:: update(name_arg: str [, catalog_updates: Optional[List[CleanRoomCatalogUpdate]], comment: Optional[str], owner: Optional[str]]) -> CleanRoomInfo + + Update a clean room. + + Updates the clean room with the changes and data objects in the request. The caller must be the owner + of the clean room or a metastore admin. + + When the caller is a metastore admin, only the __owner__ field can be updated. + + In the case that the clean room name is changed **updateCleanRoom** requires that the caller is both + the clean room owner and a metastore admin. + + For each table that is added through this method, the clean room owner must also have **SELECT** + privilege on the table. The privilege must be maintained indefinitely for recipients to be able to + access the table. Typically, you should use a group as the clean room owner. + + Table removals through **update** do not require additional privileges. + + :param name_arg: str + The name of the clean room. + :param catalog_updates: List[:class:`CleanRoomCatalogUpdate`] (optional) + Array of shared data object updates. + :param comment: str (optional) + User-provided free-form text description. + :param owner: str (optional) + Username of current owner of clean room. + + :returns: :class:`CleanRoomInfo` + \ No newline at end of file diff --git a/docs/workspace/sharing/index.rst b/docs/workspace/sharing/index.rst new file mode 100644 index 000000000..e012eb548 --- /dev/null +++ b/docs/workspace/sharing/index.rst @@ -0,0 +1,14 @@ + +Delta Sharing +============= + +Configure data sharing with Unity Catalog for providers, recipients, and shares + +.. toctree:: + :maxdepth: 1 + + clean_rooms + providers + recipient_activation + recipients + shares \ No newline at end of file diff --git a/docs/workspace/sharing/providers.rst b/docs/workspace/sharing/providers.rst new file mode 100644 index 000000000..1382b5a92 --- /dev/null +++ b/docs/workspace/sharing/providers.rst @@ -0,0 +1,214 @@ +``w.providers``: Providers +========================== +.. currentmodule:: databricks.sdk.service.sharing + +.. py:class:: ProvidersAPI + + A data provider is an object representing the organization in the real world who shares the data. A + provider contains shares which further contain the shared data. + + .. py:method:: create(name: str, authentication_type: AuthenticationType [, comment: Optional[str], recipient_profile_str: Optional[str]]) -> ProviderInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + public_share_recipient = """{ + "shareCredentialsVersion":1, + "bearerToken":"dapiabcdefghijklmonpqrstuvwxyz", + "endpoint":"https://sharing.delta.io/delta-sharing/" + } + """ + + created = w.providers.create(name=f'sdk-{time.time_ns()}', recipient_profile_str=public_share_recipient) + + # cleanup + w.providers.delete(name=created.name) + + Create an auth provider. + + Creates a new authentication provider minimally based on a name and authentication type. The caller + must be an admin on the metastore. + + :param name: str + The name of the Provider. + :param authentication_type: :class:`AuthenticationType` + The delta sharing authentication type. + :param comment: str (optional) + Description about the provider. + :param recipient_profile_str: str (optional) + This field is required when the __authentication_type__ is **TOKEN** or not provided. + + :returns: :class:`ProviderInfo` + + + .. py:method:: delete(name: str) + + Delete a provider. + + Deletes an authentication provider, if the caller is a metastore admin or is the owner of the + provider. + + :param name: str + Name of the provider. + + + + + .. py:method:: get(name: str) -> ProviderInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + public_share_recipient = """{ + "shareCredentialsVersion":1, + "bearerToken":"dapiabcdefghijklmonpqrstuvwxyz", + "endpoint":"https://sharing.delta.io/delta-sharing/" + } + """ + + created = w.providers.create(name=f'sdk-{time.time_ns()}', recipient_profile_str=public_share_recipient) + + _ = w.providers.get(name=created.name) + + # cleanup + w.providers.delete(name=created.name) + + Get a provider. + + Gets a specific authentication provider. The caller must supply the name of the provider, and must + either be a metastore admin or the owner of the provider. + + :param name: str + Name of the provider. + + :returns: :class:`ProviderInfo` + + + .. py:method:: list( [, data_provider_global_metastore_id: Optional[str]]) -> Iterator[ProviderInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sharing + + w = WorkspaceClient() + + all = w.providers.list(sharing.ListProvidersRequest()) + + List providers. + + Gets an array of available authentication providers. The caller must either be a metastore admin or + the owner of the providers. Providers not owned by the caller are not included in the response. There + is no guarantee of a specific ordering of the elements in the array. + + :param data_provider_global_metastore_id: str (optional) + If not provided, all providers will be returned. If no providers exist with this ID, no results will + be returned. + + :returns: Iterator over :class:`ProviderInfo` + + + .. py:method:: list_shares(name: str) -> Iterator[ProviderShare] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + public_share_recipient = """{ + "shareCredentialsVersion":1, + "bearerToken":"dapiabcdefghijklmonpqrstuvwxyz", + "endpoint":"https://sharing.delta.io/delta-sharing/" + } + """ + + created = w.providers.create(name=f'sdk-{time.time_ns()}', recipient_profile_str=public_share_recipient) + + shares = w.providers.list_shares(name=created.name) + + # cleanup + w.providers.delete(name=created.name) + + List shares by Provider. + + Gets an array of a specified provider's shares within the metastore where: + + * the caller is a metastore admin, or * the caller is the owner. + + :param name: str + Name of the provider in which to list shares. + + :returns: Iterator over :class:`ProviderShare` + + + .. py:method:: update(name: str [, comment: Optional[str], new_name: Optional[str], owner: Optional[str], recipient_profile_str: Optional[str]]) -> ProviderInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + public_share_recipient = """{ + "shareCredentialsVersion":1, + "bearerToken":"dapiabcdefghijklmonpqrstuvwxyz", + "endpoint":"https://sharing.delta.io/delta-sharing/" + } + """ + + created = w.providers.create(name=f'sdk-{time.time_ns()}', recipient_profile_str=public_share_recipient) + + _ = w.providers.update(name=created.name, comment="Comment for update") + + # cleanup + w.providers.delete(name=created.name) + + Update a provider. + + Updates the information for an authentication provider, if the caller is a metastore admin or is the + owner of the provider. If the update changes the provider name, the caller must be both a metastore + admin and the owner of the provider. + + :param name: str + Name of the provider. + :param comment: str (optional) + Description about the provider. + :param new_name: str (optional) + New name for the provider. + :param owner: str (optional) + Username of Provider owner. + :param recipient_profile_str: str (optional) + This field is required when the __authentication_type__ is **TOKEN** or not provided. + + :returns: :class:`ProviderInfo` + \ No newline at end of file diff --git a/docs/workspace/sharing/recipient_activation.rst b/docs/workspace/sharing/recipient_activation.rst new file mode 100644 index 000000000..2c214d9c0 --- /dev/null +++ b/docs/workspace/sharing/recipient_activation.rst @@ -0,0 +1,37 @@ +``w.recipient_activation``: Recipient Activation +================================================ +.. currentmodule:: databricks.sdk.service.sharing + +.. py:class:: RecipientActivationAPI + + The Recipient Activation API is only applicable in the open sharing model where the recipient object has + the authentication type of `TOKEN`. The data recipient follows the activation link shared by the data + provider to download the credential file that includes the access token. The recipient will then use the + credential file to establish a secure connection with the provider to receive the shared data. + + Note that you can download the credential file only once. Recipients should treat the downloaded + credential as a secret and must not share it outside of their organization. + + .. py:method:: get_activation_url_info(activation_url: str) + + Get a share activation URL. + + Gets an activation URL for a share. + + :param activation_url: str + The one time activation url. It also accepts activation token. + + + + + .. py:method:: retrieve_token(activation_url: str) -> RetrieveTokenResponse + + Get an access token. + + Retrieve access token with an activation url. This is a public API without any authentication. + + :param activation_url: str + The one time activation url. It also accepts activation token. + + :returns: :class:`RetrieveTokenResponse` + \ No newline at end of file diff --git a/docs/workspace/sharing/recipients.rst b/docs/workspace/sharing/recipients.rst new file mode 100644 index 000000000..86a004d36 --- /dev/null +++ b/docs/workspace/sharing/recipients.rst @@ -0,0 +1,247 @@ +``w.recipients``: Recipients +============================ +.. currentmodule:: databricks.sdk.service.sharing + +.. py:class:: RecipientsAPI + + A recipient is an object you create using :method:recipients/create to represent an organization which you + want to allow access shares. The way how sharing works differs depending on whether or not your recipient + has access to a Databricks workspace that is enabled for Unity Catalog: + + - For recipients with access to a Databricks workspace that is enabled for Unity Catalog, you can create a + recipient object along with a unique sharing identifier you get from the recipient. The sharing identifier + is the key identifier that enables the secure connection. This sharing mode is called + **Databricks-to-Databricks sharing**. + + - For recipients without access to a Databricks workspace that is enabled for Unity Catalog, when you + create a recipient object, Databricks generates an activation link you can send to the recipient. The + recipient follows the activation link to download the credential file, and then uses the credential file + to establish a secure connection to receive the shared data. This sharing mode is called **open sharing**. + + .. py:method:: create(name: str, authentication_type: AuthenticationType [, comment: Optional[str], data_recipient_global_metastore_id: Optional[str], ip_access_list: Optional[IpAccessList], owner: Optional[str], properties_kvpairs: Optional[SecurablePropertiesKvPairs], sharing_code: Optional[str]]) -> RecipientInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.recipients.create(name=f'sdk-{time.time_ns()}') + + # cleanup + w.recipients.delete(name=created.name) + + Create a share recipient. + + Creates a new recipient with the delta sharing authentication type in the metastore. The caller must + be a metastore admin or has the **CREATE_RECIPIENT** privilege on the metastore. + + :param name: str + Name of Recipient. + :param authentication_type: :class:`AuthenticationType` + The delta sharing authentication type. + :param comment: str (optional) + Description about the recipient. + :param data_recipient_global_metastore_id: str (optional) + The global Unity Catalog metastore id provided by the data recipient. This field is required when + the __authentication_type__ is **DATABRICKS**. The identifier is of format + __cloud__:__region__:__metastore-uuid__. + :param ip_access_list: :class:`IpAccessList` (optional) + IP Access List + :param owner: str (optional) + Username of the recipient owner. + :param properties_kvpairs: :class:`SecurablePropertiesKvPairs` (optional) + Recipient properties as map of string key-value pairs. + :param sharing_code: str (optional) + The one-time sharing code provided by the data recipient. This field is required when the + __authentication_type__ is **DATABRICKS**. + + :returns: :class:`RecipientInfo` + + + .. py:method:: delete(name: str) + + Delete a share recipient. + + Deletes the specified recipient from the metastore. The caller must be the owner of the recipient. + + :param name: str + Name of the recipient. + + + + + .. py:method:: get(name: str) -> RecipientInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.recipients.create(name=f'sdk-{time.time_ns()}') + + _ = w.recipients.get(name=created.name) + + # cleanup + w.recipients.delete(name=created.name) + + Get a share recipient. + + Gets a share recipient from the metastore if: + + * the caller is the owner of the share recipient, or: * is a metastore admin + + :param name: str + Name of the recipient. + + :returns: :class:`RecipientInfo` + + + .. py:method:: list( [, data_recipient_global_metastore_id: Optional[str]]) -> Iterator[RecipientInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sharing + + w = WorkspaceClient() + + all = w.recipients.list(sharing.ListRecipientsRequest()) + + List share recipients. + + Gets an array of all share recipients within the current metastore where: + + * the caller is a metastore admin, or * the caller is the owner. There is no guarantee of a specific + ordering of the elements in the array. + + :param data_recipient_global_metastore_id: str (optional) + If not provided, all recipients will be returned. If no recipients exist with this ID, no results + will be returned. + + :returns: Iterator over :class:`RecipientInfo` + + + .. py:method:: rotate_token(name: str, existing_token_expire_in_seconds: int) -> RecipientInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.recipients.create(name=f'sdk-{time.time_ns()}') + + recipient_info = w.recipients.rotate_token(name=created.name, existing_token_expire_in_seconds=0) + + # cleanup + w.recipients.delete(name=created.name) + + Rotate a token. + + Refreshes the specified recipient's delta sharing authentication token with the provided token info. + The caller must be the owner of the recipient. + + :param name: str + The name of the recipient. + :param existing_token_expire_in_seconds: int + The expiration time of the bearer token in ISO 8601 format. This will set the expiration_time of + existing token only to a smaller timestamp, it cannot extend the expiration_time. Use 0 to expire + the existing token immediately, negative number will return an error. + + :returns: :class:`RecipientInfo` + + + .. py:method:: share_permissions(name: str) -> GetRecipientSharePermissionsResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.recipients.create(name=f'sdk-{time.time_ns()}') + + share_permissions = w.recipients.share_permissions(name=created.name) + + # cleanup + w.recipients.delete(name=created.name) + + Get recipient share permissions. + + Gets the share permissions for the specified Recipient. The caller must be a metastore admin or the + owner of the Recipient. + + :param name: str + The name of the Recipient. + + :returns: :class:`GetRecipientSharePermissionsResponse` + + + .. py:method:: update(name: str [, comment: Optional[str], ip_access_list: Optional[IpAccessList], new_name: Optional[str], owner: Optional[str], properties_kvpairs: Optional[SecurablePropertiesKvPairs]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.recipients.create(name=f'sdk-{time.time_ns()}') + + w.recipients.update(name=created.name, comment=f'sdk-{time.time_ns()}') + + # cleanup + w.recipients.delete(name=created.name) + + Update a share recipient. + + Updates an existing recipient in the metastore. The caller must be a metastore admin or the owner of + the recipient. If the recipient name will be updated, the user must be both a metastore admin and the + owner of the recipient. + + :param name: str + Name of the recipient. + :param comment: str (optional) + Description about the recipient. + :param ip_access_list: :class:`IpAccessList` (optional) + IP Access List + :param new_name: str (optional) + New name for the recipient. + :param owner: str (optional) + Username of the recipient owner. + :param properties_kvpairs: :class:`SecurablePropertiesKvPairs` (optional) + Recipient properties as map of string key-value pairs. When provided in update request, the + specified properties will override the existing properties. To add and remove properties, one would + need to perform a read-modify-write. + + + \ No newline at end of file diff --git a/docs/workspace/sharing/shares.rst b/docs/workspace/sharing/shares.rst new file mode 100644 index 000000000..63c9b1ebe --- /dev/null +++ b/docs/workspace/sharing/shares.rst @@ -0,0 +1,211 @@ +``w.shares``: Shares +==================== +.. currentmodule:: databricks.sdk.service.sharing + +.. py:class:: SharesAPI + + A share is a container instantiated with :method:shares/create. Once created you can iteratively register + a collection of existing data assets defined within the metastore using :method:shares/update. You can + register data assets under their original name, qualified by their original schema, or provide alternate + exposed names. + + .. py:method:: create(name: str [, comment: Optional[str]]) -> ShareInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created_share = w.shares.create(name=f'sdk-{time.time_ns()}') + + # cleanup + w.shares.delete(name=created_share.name) + + Create a share. + + Creates a new share for data objects. Data objects can be added after creation with **update**. The + caller must be a metastore admin or have the **CREATE_SHARE** privilege on the metastore. + + :param name: str + Name of the share. + :param comment: str (optional) + User-provided free-form text description. + + :returns: :class:`ShareInfo` + + + .. py:method:: delete(name: str) + + Delete a share. + + Deletes a data object share from the metastore. The caller must be an owner of the share. + + :param name: str + The name of the share. + + + + + .. py:method:: get(name: str [, include_shared_data: Optional[bool]]) -> ShareInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created_share = w.shares.create(name=f'sdk-{time.time_ns()}') + + _ = w.shares.get(name=created_share.name) + + # cleanup + w.shares.delete(name=created_share.name) + + Get a share. + + Gets a data object share from the metastore. The caller must be a metastore admin or the owner of the + share. + + :param name: str + The name of the share. + :param include_shared_data: bool (optional) + Query for data to include in the share. + + :returns: :class:`ShareInfo` + + + .. py:method:: list() -> Iterator[ShareInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.shares.list() + + List shares. + + Gets an array of data object shares from the metastore. The caller must be a metastore admin or the + owner of the share. There is no guarantee of a specific ordering of the elements in the array. + + :returns: Iterator over :class:`ShareInfo` + + + .. py:method:: share_permissions(name: str) -> catalog.PermissionsList + + Get permissions. + + Gets the permissions for a data share from the metastore. The caller must be a metastore admin or the + owner of the share. + + :param name: str + The name of the share. + + :returns: :class:`PermissionsList` + + + .. py:method:: update(name: str [, comment: Optional[str], new_name: Optional[str], owner: Optional[str], updates: Optional[List[SharedDataObjectUpdate]]]) -> ShareInfo + + + Usage: + + .. code-block:: + + import os + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sharing + + w = WorkspaceClient() + + table_name = f'sdk-{time.time_ns()}' + + created_catalog = w.catalogs.create(name=f'sdk-{time.time_ns()}') + + created_schema = w.schemas.create(name=f'sdk-{time.time_ns()}', catalog_name=created_catalog.name) + + _ = w.statement_execution.execute(warehouse_id=os.environ["TEST_DEFAULT_WAREHOUSE_ID"], + catalog=created_catalog.name, + schema=created_schema.name, + statement="CREATE TABLE %s AS SELECT 2+2 as four" % (table_name)).result() + + table_full_name = "%s.%s.%s" % (created_catalog.name, created_schema.name, table_name) + + created_share = w.shares.create(name=f'sdk-{time.time_ns()}') + + _ = w.shares.update(name=created_share.name, + updates=[ + sharing.SharedDataObjectUpdate(action=sharing.SharedDataObjectUpdateAction.ADD, + data_object=sharing.SharedDataObject( + name=table_full_name, data_object_type="TABLE")) + ]) + + # cleanup + w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=created_catalog.name, force=True) + w.tables.delete(full_name=table_full_name) + w.shares.delete(name=created_share.name) + + Update a share. + + Updates the share with the changes and data objects in the request. The caller must be the owner of + the share or a metastore admin. + + When the caller is a metastore admin, only the __owner__ field can be updated. + + In the case that the share name is changed, **updateShare** requires that the caller is both the share + owner and a metastore admin. + + For each table that is added through this method, the share owner must also have **SELECT** privilege + on the table. This privilege must be maintained indefinitely for recipients to be able to access the + table. Typically, you should use a group as the share owner. + + Table removals through **update** do not require additional privileges. + + :param name: str + The name of the share. + :param comment: str (optional) + User-provided free-form text description. + :param new_name: str (optional) + New name for the share. + :param owner: str (optional) + Username of current owner of share. + :param updates: List[:class:`SharedDataObjectUpdate`] (optional) + Array of shared data object updates. + + :returns: :class:`ShareInfo` + + + .. py:method:: update_permissions(name: str [, changes: Optional[List[catalog.PermissionsChange]]]) + + Update permissions. + + Updates the permissions for a data share in the metastore. The caller must be a metastore admin or an + owner of the share. + + For new recipient grants, the user must also be the owner of the recipients. recipient revocations do + not require additional privileges. + + :param name: str + The name of the share. + :param changes: List[:class:`PermissionsChange`] (optional) + Array of permission changes. + + + \ No newline at end of file diff --git a/docs/workspace/sql/alerts.rst b/docs/workspace/sql/alerts.rst new file mode 100644 index 000000000..49a518bda --- /dev/null +++ b/docs/workspace/sql/alerts.rst @@ -0,0 +1,183 @@ +``w.alerts``: Alerts +==================== +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: AlertsAPI + + The alerts API can be used to perform CRUD operations on alerts. An alert is a Databricks SQL object that + periodically runs a query, evaluates a condition of its result, and notifies one or more users and/or + notification destinations if the condition was met. Alerts can be scheduled using the `sql_task` type of + the Jobs API, e.g. :method:jobs/create. + + .. py:method:: create(name: str, options: AlertOptions, query_id: str [, parent: Optional[str], rearm: Optional[int]]) -> Alert + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sql + + w = WorkspaceClient() + + srcs = w.data_sources.list() + + query = w.queries.create(name=f'sdk-{time.time_ns()}', + data_source_id=srcs[0].id, + description="test query from Go SDK", + query="SELECT 1") + + alert = w.alerts.create(options=sql.AlertOptions(column="1", op="==", value="1"), + name=f'sdk-{time.time_ns()}', + query_id=query.id) + + # cleanup + w.queries.delete(query_id=query.id) + w.alerts.delete(alert_id=alert.id) + + Create an alert. + + Creates an alert. An alert is a Databricks SQL object that periodically runs a query, evaluates a + condition of its result, and notifies users or notification destinations if the condition was met. + + :param name: str + Name of the alert. + :param options: :class:`AlertOptions` + Alert configuration options. + :param query_id: str + Query ID. + :param parent: str (optional) + The identifier of the workspace folder containing the object. + :param rearm: int (optional) + Number of seconds after being triggered before the alert rearms itself and can be triggered again. + If `null`, alert will never be triggered again. + + :returns: :class:`Alert` + + + .. py:method:: delete(alert_id: str) + + Delete an alert. + + Deletes an alert. Deleted alerts are no longer accessible and cannot be restored. **Note:** Unlike + queries and dashboards, alerts cannot be moved to the trash. + + :param alert_id: str + + + + + .. py:method:: get(alert_id: str) -> Alert + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sql + + w = WorkspaceClient() + + srcs = w.data_sources.list() + + query = w.queries.create(name=f'sdk-{time.time_ns()}', + data_source_id=srcs[0].id, + description="test query from Go SDK", + query="SELECT 1") + + alert = w.alerts.create(options=sql.AlertOptions(column="1", op="==", value="1"), + name=f'sdk-{time.time_ns()}', + query_id=query.id) + + by_id = w.alerts.get(alert_id=alert.id) + + # cleanup + w.queries.delete(query_id=query.id) + w.alerts.delete(alert_id=alert.id) + + Get an alert. + + Gets an alert. + + :param alert_id: str + + :returns: :class:`Alert` + + + .. py:method:: list() -> Iterator[Alert] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + all = w.alerts.list() + + Get alerts. + + Gets a list of alerts. + + :returns: Iterator over :class:`Alert` + + + .. py:method:: update(alert_id: str, name: str, options: AlertOptions, query_id: str [, rearm: Optional[int]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sql + + w = WorkspaceClient() + + srcs = w.data_sources.list() + + query = w.queries.create(name=f'sdk-{time.time_ns()}', + data_source_id=srcs[0].id, + description="test query from Go SDK", + query="SELECT 1") + + alert = w.alerts.create(options=sql.AlertOptions(column="1", op="==", value="1"), + name=f'sdk-{time.time_ns()}', + query_id=query.id) + + w.alerts.update(options=sql.AlertOptions(column="1", op="==", value="1"), + alert_id=alert.id, + name=f'sdk-{time.time_ns()}', + query_id=query.id) + + # cleanup + w.queries.delete(query_id=query.id) + w.alerts.delete(alert_id=alert.id) + + Update an alert. + + Updates an alert. + + :param alert_id: str + :param name: str + Name of the alert. + :param options: :class:`AlertOptions` + Alert configuration options. + :param query_id: str + Query ID. + :param rearm: int (optional) + Number of seconds after being triggered before the alert rearms itself and can be triggered again. + If `null`, alert will never be triggered again. + + + \ No newline at end of file diff --git a/docs/workspace/sql/dashboard_widgets.rst b/docs/workspace/sql/dashboard_widgets.rst new file mode 100644 index 000000000..d4bbcde1d --- /dev/null +++ b/docs/workspace/sql/dashboard_widgets.rst @@ -0,0 +1,56 @@ +``w.dashboard_widgets``: Dashboard Widgets +========================================== +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: DashboardWidgetsAPI + + This is an evolving API that facilitates the addition and removal of widgets from existing dashboards + within the Databricks Workspace. Data structures may change over time. + + .. py:method:: create(dashboard_id: str, options: WidgetOptions, width: int [, text: Optional[str], visualization_id: Optional[str]]) -> Widget + + Add widget to a dashboard. + + :param dashboard_id: str + Dashboard ID returned by :method:dashboards/create. + :param options: :class:`WidgetOptions` + :param width: int + Width of a widget + :param text: str (optional) + If this is a textbox widget, the application displays this text. This field is ignored if the widget + contains a visualization in the `visualization` field. + :param visualization_id: str (optional) + Query Vizualization ID returned by :method:queryvisualizations/create. + + :returns: :class:`Widget` + + + .. py:method:: delete(id: str) + + Remove widget. + + :param id: str + Widget ID returned by :method:dashboardwidgets/create + + + + + .. py:method:: update(id: str, dashboard_id: str, options: WidgetOptions, width: int [, text: Optional[str], visualization_id: Optional[str]]) -> Widget + + Update existing widget. + + :param id: str + Widget ID returned by :method:dashboardwidgets/create + :param dashboard_id: str + Dashboard ID returned by :method:dashboards/create. + :param options: :class:`WidgetOptions` + :param width: int + Width of a widget + :param text: str (optional) + If this is a textbox widget, the application displays this text. This field is ignored if the widget + contains a visualization in the `visualization` field. + :param visualization_id: str (optional) + Query Vizualization ID returned by :method:queryvisualizations/create. + + :returns: :class:`Widget` + \ No newline at end of file diff --git a/docs/workspace/sql/dashboards.rst b/docs/workspace/sql/dashboards.rst new file mode 100644 index 000000000..29cafb7cc --- /dev/null +++ b/docs/workspace/sql/dashboards.rst @@ -0,0 +1,184 @@ +``w.dashboards``: Dashboards +============================ +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: DashboardsAPI + + In general, there is little need to modify dashboards using the API. However, it can be useful to use + dashboard objects to look-up a collection of related query IDs. The API can also be used to duplicate + multiple dashboards at once since you can get a dashboard definition with a GET request and then POST it + to create a new one. Dashboards can be scheduled using the `sql_task` type of the Jobs API, e.g. + :method:jobs/create. + + .. py:method:: create(name: str [, dashboard_filters_enabled: Optional[bool], is_favorite: Optional[bool], parent: Optional[str], run_as_role: Optional[RunAsRole], tags: Optional[List[str]]]) -> Dashboard + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.dashboards.create(name=f'sdk-{time.time_ns()}') + + # cleanup + w.dashboards.delete(dashboard_id=created.id) + + Create a dashboard object. + + :param name: str + The title of this dashboard that appears in list views and at the top of the dashboard page. + :param dashboard_filters_enabled: bool (optional) + Indicates whether the dashboard filters are enabled + :param is_favorite: bool (optional) + Indicates whether this dashboard object should appear in the current user's favorites list. + :param parent: str (optional) + The identifier of the workspace folder containing the object. + :param run_as_role: :class:`RunAsRole` (optional) + Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior) + :param tags: List[str] (optional) + + :returns: :class:`Dashboard` + + + .. py:method:: delete(dashboard_id: str) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.dashboards.create(name=f'sdk-{time.time_ns()}') + + w.dashboards.delete(dashboard_id=created.id) + + # cleanup + w.dashboards.delete(dashboard_id=created.id) + + Remove a dashboard. + + Moves a dashboard to the trash. Trashed dashboards do not appear in list views or searches, and cannot + be shared. + + :param dashboard_id: str + + + + + .. py:method:: get(dashboard_id: str) -> Dashboard + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.dashboards.create(name=f'sdk-{time.time_ns()}') + + by_id = w.dashboards.get(dashboard_id=created.id) + + # cleanup + w.dashboards.delete(dashboard_id=created.id) + + Retrieve a definition. + + Returns a JSON representation of a dashboard object, including its visualization and query objects. + + :param dashboard_id: str + + :returns: :class:`Dashboard` + + + .. py:method:: list( [, order: Optional[ListOrder], page: Optional[int], page_size: Optional[int], q: Optional[str]]) -> Iterator[Dashboard] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sql + + w = WorkspaceClient() + + all = w.dashboards.list(sql.ListDashboardsRequest()) + + Get dashboard objects. + + Fetch a paginated list of dashboard objects. + + :param order: :class:`ListOrder` (optional) + Name of dashboard attribute to order by. + :param page: int (optional) + Page number to retrieve. + :param page_size: int (optional) + Number of dashboards to return per page. + :param q: str (optional) + Full text search term. + + :returns: Iterator over :class:`Dashboard` + + + .. py:method:: restore(dashboard_id: str) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.dashboards.create(name=f'sdk-{time.time_ns()}') + + w.dashboards.restore(dashboard_id=created.id) + + # cleanup + w.dashboards.delete(dashboard_id=created.id) + + Restore a dashboard. + + A restored dashboard appears in list views and searches and can be shared. + + :param dashboard_id: str + + + + + .. py:method:: update(dashboard_id: str [, name: Optional[str], run_as_role: Optional[RunAsRole]]) -> Dashboard + + Change a dashboard definition. + + Modify this dashboard definition. This operation only affects attributes of the dashboard object. It + does not add, modify, or remove widgets. + + **Note**: You cannot undo this operation. + + :param dashboard_id: str + :param name: str (optional) + The title of this dashboard that appears in list views and at the top of the dashboard page. + :param run_as_role: :class:`RunAsRole` (optional) + Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior) + + :returns: :class:`Dashboard` + \ No newline at end of file diff --git a/docs/workspace/sql/data_sources.rst b/docs/workspace/sql/data_sources.rst new file mode 100644 index 000000000..5cf1ed526 --- /dev/null +++ b/docs/workspace/sql/data_sources.rst @@ -0,0 +1,35 @@ +``w.data_sources``: Data Sources +================================ +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: DataSourcesAPI + + This API is provided to assist you in making new query objects. When creating a query object, you may + optionally specify a `data_source_id` for the SQL warehouse against which it will run. If you don't + already know the `data_source_id` for your desired SQL warehouse, this API will help you find it. + + This API does not support searches. It returns the full list of SQL warehouses in your workspace. We + advise you to use any text editor, REST client, or `grep` to search the response from this API for the + name of your SQL warehouse as it appears in Databricks SQL. + + .. py:method:: list() -> Iterator[DataSource] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + srcs = w.data_sources.list() + + Get a list of SQL warehouses. + + Retrieves a full list of SQL warehouses available in this workspace. All fields that appear in this + API response are enumerated for clarity. However, you need only a SQL warehouse's `id` to create new + queries against it. + + :returns: Iterator over :class:`DataSource` + \ No newline at end of file diff --git a/docs/workspace/sql/dbsql_permissions.rst b/docs/workspace/sql/dbsql_permissions.rst new file mode 100644 index 000000000..07aa4f00f --- /dev/null +++ b/docs/workspace/sql/dbsql_permissions.rst @@ -0,0 +1,63 @@ +``w.dbsql_permissions``: ACL / Permissions +========================================== +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: DbsqlPermissionsAPI + + The SQL Permissions API is similar to the endpoints of the :method:permissions/set. However, this exposes + only one endpoint, which gets the Access Control List for a given object. You cannot modify any + permissions using this API. + + There are three levels of permission: + + - `CAN_VIEW`: Allows read-only access + + - `CAN_RUN`: Allows read access and run access (superset of `CAN_VIEW`) + + - `CAN_MANAGE`: Allows all actions: read, run, edit, delete, modify permissions (superset of `CAN_RUN`) + + .. py:method:: get(object_type: ObjectTypePlural, object_id: str) -> GetResponse + + Get object ACL. + + Gets a JSON representation of the access control list (ACL) for a specified object. + + :param object_type: :class:`ObjectTypePlural` + The type of object permissions to check. + :param object_id: str + Object ID. An ACL is returned for the object with this UUID. + + :returns: :class:`GetResponse` + + + .. py:method:: set(object_type: ObjectTypePlural, object_id: str [, access_control_list: Optional[List[AccessControl]]]) -> SetResponse + + Set object ACL. + + Sets the access control list (ACL) for a specified object. This operation will complete rewrite the + ACL. + + :param object_type: :class:`ObjectTypePlural` + The type of object permission to set. + :param object_id: str + Object ID. The ACL for the object with this UUID is overwritten by this request's POST content. + :param access_control_list: List[:class:`AccessControl`] (optional) + + :returns: :class:`SetResponse` + + + .. py:method:: transfer_ownership(object_type: OwnableObjectType, object_id: TransferOwnershipObjectId [, new_owner: Optional[str]]) -> Success + + Transfer object ownership. + + Transfers ownership of a dashboard, query, or alert to an active user. Requires an admin API key. + + :param object_type: :class:`OwnableObjectType` + The type of object on which to change ownership. + :param object_id: :class:`TransferOwnershipObjectId` + The ID of the object on which to change ownership. + :param new_owner: str (optional) + Email address for the new owner, who must exist in the workspace. + + :returns: :class:`Success` + \ No newline at end of file diff --git a/docs/workspace/sql/index.rst b/docs/workspace/sql/index.rst new file mode 100644 index 000000000..397de5c72 --- /dev/null +++ b/docs/workspace/sql/index.rst @@ -0,0 +1,19 @@ + +Databricks SQL +============== + +Manage Databricks SQL assets, including warehouses, dashboards, queries and query history, and alerts + +.. toctree:: + :maxdepth: 1 + + alerts + dashboard_widgets + dashboards + data_sources + dbsql_permissions + queries + query_history + query_visualizations + statement_execution + warehouses \ No newline at end of file diff --git a/docs/workspace/sql/queries.rst b/docs/workspace/sql/queries.rst new file mode 100644 index 000000000..32803f6d3 --- /dev/null +++ b/docs/workspace/sql/queries.rst @@ -0,0 +1,214 @@ +``w.queries``: Queries / Results +================================ +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: QueriesAPI + + These endpoints are used for CRUD operations on query definitions. Query definitions include the target + SQL warehouse, query text, name, description, tags, parameters, and visualizations. Queries can be + scheduled using the `sql_task` type of the Jobs API, e.g. :method:jobs/create. + + .. py:method:: create( [, data_source_id: Optional[str], description: Optional[str], name: Optional[str], options: Optional[Any], parent: Optional[str], query: Optional[str], run_as_role: Optional[RunAsRole]]) -> Query + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + srcs = w.data_sources.list() + + query = w.queries.create(name=f'sdk-{time.time_ns()}', + data_source_id=srcs[0].id, + description="test query from Go SDK", + query="SHOW TABLES") + + # cleanup + w.queries.delete(query_id=query.id) + + Create a new query definition. + + Creates a new query definition. Queries created with this endpoint belong to the authenticated user + making the request. + + The `data_source_id` field specifies the ID of the SQL warehouse to run this query against. You can + use the Data Sources API to see a complete list of available SQL warehouses. Or you can copy the + `data_source_id` from an existing query. + + **Note**: You cannot add a visualization until you create the query. + + :param data_source_id: str (optional) + Data source ID maps to the ID of the data source used by the resource and is distinct from the + warehouse ID. [Learn more]. + + [Learn more]: https://docs.databricks.com/api/workspace/datasources/list + :param description: str (optional) + General description that conveys additional information about this query such as usage notes. + :param name: str (optional) + The title of this query that appears in list views, widget headings, and on the query page. + :param options: Any (optional) + Exclusively used for storing a list parameter definitions. A parameter is an object with `title`, + `name`, `type`, and `value` properties. The `value` field here is the default value. It can be + overridden at runtime. + :param parent: str (optional) + The identifier of the workspace folder containing the object. + :param query: str (optional) + The text of the query to be run. + :param run_as_role: :class:`RunAsRole` (optional) + Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior) + + :returns: :class:`Query` + + + .. py:method:: delete(query_id: str) + + Delete a query. + + Moves a query to the trash. Trashed queries immediately disappear from searches and list views, and + they cannot be used for alerts. The trash is deleted after 30 days. + + :param query_id: str + + + + + .. py:method:: get(query_id: str) -> Query + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + srcs = w.data_sources.list() + + query = w.queries.create(name=f'sdk-{time.time_ns()}', + data_source_id=srcs[0].id, + description="test query from Go SDK", + query="SHOW TABLES") + + by_id = w.queries.get(query_id=query.id) + + # cleanup + w.queries.delete(query_id=query.id) + + Get a query definition. + + Retrieve a query object definition along with contextual permissions information about the currently + authenticated user. + + :param query_id: str + + :returns: :class:`Query` + + + .. py:method:: list( [, order: Optional[str], page: Optional[int], page_size: Optional[int], q: Optional[str]]) -> Iterator[Query] + + Get a list of queries. + + Gets a list of queries. Optionally, this list can be filtered by a search term. + + :param order: str (optional) + Name of query attribute to order by. Default sort order is ascending. Append a dash (`-`) to order + descending instead. + + - `name`: The name of the query. + + - `created_at`: The timestamp the query was created. + + - `runtime`: The time it took to run this query. This is blank for parameterized queries. A blank + value is treated as the highest value for sorting. + + - `executed_at`: The timestamp when the query was last run. + + - `created_by`: The user name of the user that created the query. + :param page: int (optional) + Page number to retrieve. + :param page_size: int (optional) + Number of queries to return per page. + :param q: str (optional) + Full text search term + + :returns: Iterator over :class:`Query` + + + .. py:method:: restore(query_id: str) + + Restore a query. + + Restore a query that has been moved to the trash. A restored query appears in list views and searches. + You can use restored queries for alerts. + + :param query_id: str + + + + + .. py:method:: update(query_id: str [, data_source_id: Optional[str], description: Optional[str], name: Optional[str], options: Optional[Any], query: Optional[str], run_as_role: Optional[RunAsRole]]) -> Query + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + srcs = w.data_sources.list() + + query = w.queries.create(name=f'sdk-{time.time_ns()}', + data_source_id=srcs[0].id, + description="test query from Go SDK", + query="SHOW TABLES") + + updated = w.queries.update(query_id=query.id, + name=f'sdk-{time.time_ns()}', + data_source_id=srcs[0].id, + description="UPDATED: test query from Go SDK", + query="SELECT 2+2") + + # cleanup + w.queries.delete(query_id=query.id) + + Change a query definition. + + Modify this query definition. + + **Note**: You cannot undo this operation. + + :param query_id: str + :param data_source_id: str (optional) + Data source ID maps to the ID of the data source used by the resource and is distinct from the + warehouse ID. [Learn more]. + + [Learn more]: https://docs.databricks.com/api/workspace/datasources/list + :param description: str (optional) + General description that conveys additional information about this query such as usage notes. + :param name: str (optional) + The title of this query that appears in list views, widget headings, and on the query page. + :param options: Any (optional) + Exclusively used for storing a list parameter definitions. A parameter is an object with `title`, + `name`, `type`, and `value` properties. The `value` field here is the default value. It can be + overridden at runtime. + :param query: str (optional) + The text of the query to be run. + :param run_as_role: :class:`RunAsRole` (optional) + Sets the **Run as** role for the object. Must be set to one of `"viewer"` (signifying "run as + viewer" behavior) or `"owner"` (signifying "run as owner" behavior) + + :returns: :class:`Query` + \ No newline at end of file diff --git a/docs/workspace/sql/query_history.rst b/docs/workspace/sql/query_history.rst new file mode 100644 index 000000000..c4c6ee9ae --- /dev/null +++ b/docs/workspace/sql/query_history.rst @@ -0,0 +1,40 @@ +``w.query_history``: Query History +================================== +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: QueryHistoryAPI + + Access the history of queries through SQL warehouses. + + .. py:method:: list( [, filter_by: Optional[QueryFilter], include_metrics: Optional[bool], max_results: Optional[int], page_token: Optional[str]]) -> Iterator[QueryInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sql + + w = WorkspaceClient() + + _ = w.query_history.list(filter_by=sql.QueryFilter( + query_start_time_range=sql.TimeRange(start_time_ms=1690243200000, end_time_ms=1690329600000))) + + List Queries. + + List the history of queries through SQL warehouses. + + You can filter by user ID, warehouse ID, status, and time range. + + :param filter_by: :class:`QueryFilter` (optional) + A filter to limit query history results. This field is optional. + :param include_metrics: bool (optional) + Whether to include metrics about query. + :param max_results: int (optional) + Limit the number of results returned in one page. The default is 100. + :param page_token: str (optional) + A token that can be used to get the next page of results. + + :returns: Iterator over :class:`QueryInfo` + \ No newline at end of file diff --git a/docs/workspace/sql/query_visualizations.rst b/docs/workspace/sql/query_visualizations.rst new file mode 100644 index 000000000..7ef5b1cdc --- /dev/null +++ b/docs/workspace/sql/query_visualizations.rst @@ -0,0 +1,58 @@ +``w.query_visualizations``: Query Visualizations +================================================ +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: QueryVisualizationsAPI + + This is an evolving API that facilitates the addition and removal of vizualisations from existing queries + within the Databricks Workspace. Data structures may change over time. + + .. py:method:: create(query_id: str, type: str, options: Any [, description: Optional[str], name: Optional[str]]) -> Visualization + + Add visualization to a query. + + :param query_id: str + The identifier returned by :method:queries/create + :param type: str + The type of visualization: chart, table, pivot table, and so on. + :param options: Any + The options object varies widely from one visualization type to the next and is unsupported. + Databricks does not recommend modifying visualization settings in JSON. + :param description: str (optional) + A short description of this visualization. This is not displayed in the UI. + :param name: str (optional) + The name of the visualization that appears on dashboards and the query screen. + + :returns: :class:`Visualization` + + + .. py:method:: delete(id: str) + + Remove visualization. + + :param id: str + Widget ID returned by :method:queryvizualisations/create + + + + + .. py:method:: update(id: str [, created_at: Optional[str], description: Optional[str], name: Optional[str], options: Optional[Any], type: Optional[str], updated_at: Optional[str]]) -> Visualization + + Edit existing visualization. + + :param id: str + The UUID for this visualization. + :param created_at: str (optional) + :param description: str (optional) + A short description of this visualization. This is not displayed in the UI. + :param name: str (optional) + The name of the visualization that appears on dashboards and the query screen. + :param options: Any (optional) + The options object varies widely from one visualization type to the next and is unsupported. + Databricks does not recommend modifying visualization settings in JSON. + :param type: str (optional) + The type of visualization: chart, table, pivot table, and so on. + :param updated_at: str (optional) + + :returns: :class:`Visualization` + \ No newline at end of file diff --git a/docs/workspace/sql/statement_execution.rst b/docs/workspace/sql/statement_execution.rst new file mode 100644 index 000000000..d5c479462 --- /dev/null +++ b/docs/workspace/sql/statement_execution.rst @@ -0,0 +1,270 @@ +``w.statement_execution``: Statement Execution +============================================== +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: StatementExecutionAPI + + The Databricks SQL Statement Execution API can be used to execute SQL statements on a SQL warehouse and + fetch the result. + + **Getting started** + + We suggest beginning with the [Databricks SQL Statement Execution API tutorial]. + + **Overview of statement execution and result fetching** + + Statement execution begins by issuing a :method:statementexecution/executeStatement request with a valid + SQL statement and warehouse ID, along with optional parameters such as the data catalog and output format. + If no other parameters are specified, the server will wait for up to 10s before returning a response. If + the statement has completed within this timespan, the response will include the result data as a JSON + array and metadata. Otherwise, if no result is available after the 10s timeout expired, the response will + provide the statement ID that can be used to poll for results by using a + :method:statementexecution/getStatement request. + + You can specify whether the call should behave synchronously, asynchronously or start synchronously with a + fallback to asynchronous execution. This is controlled with the `wait_timeout` and `on_wait_timeout` + settings. If `wait_timeout` is set between 5-50 seconds (default: 10s), the call waits for results up to + the specified timeout; when set to `0s`, the call is asynchronous and responds immediately with a + statement ID. The `on_wait_timeout` setting specifies what should happen when the timeout is reached while + the statement execution has not yet finished. This can be set to either `CONTINUE`, to fallback to + asynchronous mode, or it can be set to `CANCEL`, which cancels the statement. + + In summary: - Synchronous mode - `wait_timeout=30s` and `on_wait_timeout=CANCEL` - The call waits up to 30 + seconds; if the statement execution finishes within this time, the result data is returned directly in the + response. If the execution takes longer than 30 seconds, the execution is canceled and the call returns + with a `CANCELED` state. - Asynchronous mode - `wait_timeout=0s` (`on_wait_timeout` is ignored) - The call + doesn't wait for the statement to finish but returns directly with a statement ID. The status of the + statement execution can be polled by issuing :method:statementexecution/getStatement with the statement + ID. Once the execution has succeeded, this call also returns the result and metadata in the response. - + Hybrid mode (default) - `wait_timeout=10s` and `on_wait_timeout=CONTINUE` - The call waits for up to 10 + seconds; if the statement execution finishes within this time, the result data is returned directly in the + response. If the execution takes longer than 10 seconds, a statement ID is returned. The statement ID can + be used to fetch status and results in the same way as in the asynchronous mode. + + Depending on the size, the result can be split into multiple chunks. If the statement execution is + successful, the statement response contains a manifest and the first chunk of the result. The manifest + contains schema information and provides metadata for each chunk in the result. Result chunks can be + retrieved by index with :method:statementexecution/getStatementResultChunkN which may be called in any + order and in parallel. For sequential fetching, each chunk, apart from the last, also contains a + `next_chunk_index` and `next_chunk_internal_link` that point to the next chunk. + + A statement can be canceled with :method:statementexecution/cancelExecution. + + **Fetching result data: format and disposition** + + To specify the format of the result data, use the `format` field, which can be set to one of the following + options: `JSON_ARRAY` (JSON), `ARROW_STREAM` ([Apache Arrow Columnar]), or `CSV`. + + There are two ways to receive statement results, controlled by the `disposition` setting, which can be + either `INLINE` or `EXTERNAL_LINKS`: + + - `INLINE`: In this mode, the result data is directly included in the response. It's best suited for + smaller results. This mode can only be used with the `JSON_ARRAY` format. + + - `EXTERNAL_LINKS`: In this mode, the response provides links that can be used to download the result data + in chunks separately. This approach is ideal for larger results and offers higher throughput. This mode + can be used with all the formats: `JSON_ARRAY`, `ARROW_STREAM`, and `CSV`. + + By default, the API uses `format=JSON_ARRAY` and `disposition=INLINE`. + + **Limits and limitations** + + Note: The byte limit for INLINE disposition is based on internal storage metrics and will not exactly + match the byte count of the actual payload. + + - Statements with `disposition=INLINE` are limited to 25 MiB and will fail when this limit is exceeded. - + Statements with `disposition=EXTERNAL_LINKS` are limited to 100 GiB. Result sets larger than this limit + will be truncated. Truncation is indicated by the `truncated` field in the result manifest. - The maximum + query text size is 16 MiB. - Cancelation might silently fail. A successful response from a cancel request + indicates that the cancel request was successfully received and sent to the processing engine. However, an + outstanding statement might have already completed execution when the cancel request arrives. Polling for + status until a terminal state is reached is a reliable way to determine the final state. - Wait timeouts + are approximate, occur server-side, and cannot account for things such as caller delays and network + latency from caller to service. - The system will auto-close a statement after one hour if the client + stops polling and thus you must poll at least once an hour. - The results are only available for one hour + after success; polling does not extend this. + + [Apache Arrow Columnar]: https://arrow.apache.org/overview/ + [Databricks SQL Statement Execution API tutorial]: https://docs.databricks.com/sql/api/sql-execution-tutorial.html + + .. py:method:: cancel_execution(statement_id: str) + + Cancel statement execution. + + Requests that an executing statement be canceled. Callers must poll for status to see the terminal + state. + + :param statement_id: str + The statement ID is returned upon successfully submitting a SQL statement, and is a required + reference for all subsequent calls. + + + + + .. py:method:: execute_statement(statement: str, warehouse_id: str [, byte_limit: Optional[int], catalog: Optional[str], disposition: Optional[Disposition], format: Optional[Format], on_wait_timeout: Optional[ExecuteStatementRequestOnWaitTimeout], parameters: Optional[List[StatementParameterListItem]], row_limit: Optional[int], schema: Optional[str], wait_timeout: Optional[str]]) -> ExecuteStatementResponse + + Execute a SQL statement. + + :param statement: str + The SQL statement to execute. The statement can optionally be parameterized, see `parameters`. + :param warehouse_id: str + Warehouse upon which to execute a statement. See also [What are SQL + warehouses?](/sql/admin/warehouse-type.html) + :param byte_limit: int (optional) + Applies the given byte limit to the statement's result size. Byte counts are based on internal data + representations and might not match the final size in the requested `format`. If the result was + truncated due to the byte limit, then `truncated` in the response is set to `true`. When using + `EXTERNAL_LINKS` disposition, a default `byte_limit` of 100 GiB is applied if `byte_limit` is not + explcitly set. + :param catalog: str (optional) + Sets default catalog for statement execution, similar to [`USE CATALOG`] in SQL. + + [`USE CATALOG`]: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-use-catalog.html + :param disposition: :class:`Disposition` (optional) + The fetch disposition provides two modes of fetching results: `INLINE` and `EXTERNAL_LINKS`. + + Statements executed with `INLINE` disposition will return result data inline, in `JSON_ARRAY` + format, in a series of chunks. If a given statement produces a result set with a size larger than 25 + MiB, that statement execution is aborted, and no result set will be available. + + **NOTE** Byte limits are computed based upon internal representations of the result set data, and + might not match the sizes visible in JSON responses. + + Statements executed with `EXTERNAL_LINKS` disposition will return result data as external links: + URLs that point to cloud storage internal to the workspace. Using `EXTERNAL_LINKS` disposition + allows statements to generate arbitrarily sized result sets for fetching up to 100 GiB. The + resulting links have two important properties: + + 1. They point to resources _external_ to the Databricks compute; therefore any associated + authentication information (typically a personal access token, OAuth token, or similar) _must be + removed_ when fetching from these links. + + 2. These are presigned URLs with a specific expiration, indicated in the response. The behavior when + attempting to use an expired link is cloud specific. + :param format: :class:`Format` (optional) + Statement execution supports three result formats: `JSON_ARRAY` (default), `ARROW_STREAM`, and + `CSV`. + + Important: The formats `ARROW_STREAM` and `CSV` are supported only with `EXTERNAL_LINKS` + disposition. `JSON_ARRAY` is supported in `INLINE` and `EXTERNAL_LINKS` disposition. + + When specifying `format=JSON_ARRAY`, result data will be formatted as an array of arrays of values, + where each value is either the *string representation* of a value, or `null`. For example, the + output of `SELECT concat('id-', id) AS strCol, id AS intCol, null AS nullCol FROM range(3)` would + look like this: + + ``` [ [ "id-1", "1", null ], [ "id-2", "2", null ], [ "id-3", "3", null ], ] ``` + + When specifying `format=JSON_ARRAY` and `disposition=EXTERNAL_LINKS`, each chunk in the result + contains compact JSON with no indentation or extra whitespace. + + When specifying `format=ARROW_STREAM` and `disposition=EXTERNAL_LINKS`, each chunk in the result + will be formatted as Apache Arrow Stream. See the [Apache Arrow streaming format]. + + When specifying `format=CSV` and `disposition=EXTERNAL_LINKS`, each chunk in the result will be a + CSV according to [RFC 4180] standard. All the columns values will have *string representation* + similar to the `JSON_ARRAY` format, and `null` values will be encoded as “null”. Only the first + chunk in the result would contain a header row with column names. For example, the output of `SELECT + concat('id-', id) AS strCol, id AS intCol, null as nullCol FROM range(3)` would look like this: + + ``` strCol,intCol,nullCol id-1,1,null id-2,2,null id-3,3,null ``` + + [Apache Arrow streaming format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format + [RFC 4180]: https://www.rfc-editor.org/rfc/rfc4180 + :param on_wait_timeout: :class:`ExecuteStatementRequestOnWaitTimeout` (optional) + When `wait_timeout > 0s`, the call will block up to the specified time. If the statement execution + doesn't finish within this time, `on_wait_timeout` determines whether the execution should continue + or be canceled. When set to `CONTINUE`, the statement execution continues asynchronously and the + call returns a statement ID which can be used for polling with + :method:statementexecution/getStatement. When set to `CANCEL`, the statement execution is canceled + and the call returns with a `CANCELED` state. + :param parameters: List[:class:`StatementParameterListItem`] (optional) + A list of parameters to pass into a SQL statement containing parameter markers. A parameter consists + of a name, a value, and optionally a type. To represent a NULL value, the `value` field may be + omitted or set to `null` explicitly. If the `type` field is omitted, the value is interpreted as a + string. + + If the type is given, parameters will be checked for type correctness according to the given type. A + value is correct if the provided string can be converted to the requested type using the `cast` + function. The exact semantics are described in the section [`cast` function] of the SQL language + reference. + + For example, the following statement contains two parameters, `my_name` and `my_date`: + + SELECT * FROM my_table WHERE name = :my_name AND date = :my_date + + The parameters can be passed in the request body as follows: + + { ..., "statement": "SELECT * FROM my_table WHERE name = :my_name AND date = :my_date", + "parameters": [ { "name": "my_name", "value": "the name" }, { "name": "my_date", "value": + "2020-01-01", "type": "DATE" } ] } + + Currently, positional parameters denoted by a `?` marker are not supported by the Databricks SQL + Statement Execution API. + + Also see the section [Parameter markers] of the SQL language reference. + + [Parameter markers]: https://docs.databricks.com/sql/language-manual/sql-ref-parameter-marker.html + [`cast` function]: https://docs.databricks.com/sql/language-manual/functions/cast.html + :param row_limit: int (optional) + Applies the given row limit to the statement's result set, but unlike the `LIMIT` clause in SQL, it + also sets the `truncated` field in the response to indicate whether the result was trimmed due to + the limit or not. + :param schema: str (optional) + Sets default schema for statement execution, similar to [`USE SCHEMA`] in SQL. + + [`USE SCHEMA`]: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-use-schema.html + :param wait_timeout: str (optional) + The time in seconds the call will wait for the statement's result set as `Ns`, where `N` can be set + to 0 or to a value between 5 and 50. + + When set to `0s`, the statement will execute in asynchronous mode and the call will not wait for the + execution to finish. In this case, the call returns directly with `PENDING` state and a statement ID + which can be used for polling with :method:statementexecution/getStatement. + + When set between 5 and 50 seconds, the call will behave synchronously up to this timeout and wait + for the statement execution to finish. If the execution finishes within this time, the call returns + immediately with a manifest and result data (or a `FAILED` state in case of an execution error). If + the statement takes longer to execute, `on_wait_timeout` determines what should happen after the + timeout is reached. + + :returns: :class:`ExecuteStatementResponse` + + + .. py:method:: get_statement(statement_id: str) -> GetStatementResponse + + Get status, manifest, and result first chunk. + + This request can be used to poll for the statement's status. When the `status.state` field is + `SUCCEEDED` it will also return the result manifest and the first chunk of the result data. When the + statement is in the terminal states `CANCELED`, `CLOSED` or `FAILED`, it returns HTTP 200 with the + state set. After at least 12 hours in terminal state, the statement is removed from the warehouse and + further calls will receive an HTTP 404 response. + + **NOTE** This call currently might take up to 5 seconds to get the latest status and result. + + :param statement_id: str + The statement ID is returned upon successfully submitting a SQL statement, and is a required + reference for all subsequent calls. + + :returns: :class:`GetStatementResponse` + + + .. py:method:: get_statement_result_chunk_n(statement_id: str, chunk_index: int) -> ResultData + + Get result chunk by index. + + After the statement execution has `SUCCEEDED`, this request can be used to fetch any chunk by index. + Whereas the first chunk with `chunk_index=0` is typically fetched with + :method:statementexecution/executeStatement or :method:statementexecution/getStatement, this request + can be used to fetch subsequent chunks. The response structure is identical to the nested `result` + element described in the :method:statementexecution/getStatement request, and similarly includes the + `next_chunk_index` and `next_chunk_internal_link` fields for simple iteration through the result set. + + :param statement_id: str + The statement ID is returned upon successfully submitting a SQL statement, and is a required + reference for all subsequent calls. + :param chunk_index: int + + :returns: :class:`ResultData` + \ No newline at end of file diff --git a/docs/workspace/sql/warehouses.rst b/docs/workspace/sql/warehouses.rst new file mode 100644 index 000000000..793852680 --- /dev/null +++ b/docs/workspace/sql/warehouses.rst @@ -0,0 +1,394 @@ +``w.warehouses``: SQL Warehouses +================================ +.. currentmodule:: databricks.sdk.service.sql + +.. py:class:: WarehousesAPI + + A SQL warehouse is a compute resource that lets you run SQL commands on data objects within Databricks + SQL. Compute resources are infrastructure resources that provide processing capabilities in the cloud. + + .. py:method:: create( [, auto_stop_mins: Optional[int], channel: Optional[Channel], cluster_size: Optional[str], creator_name: Optional[str], enable_photon: Optional[bool], enable_serverless_compute: Optional[bool], instance_profile_arn: Optional[str], max_num_clusters: Optional[int], min_num_clusters: Optional[int], name: Optional[str], spot_instance_policy: Optional[SpotInstancePolicy], tags: Optional[EndpointTags], warehouse_type: Optional[CreateWarehouseRequestWarehouseType]]) -> Wait[GetWarehouseResponse] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.warehouses.create(name=f'sdk-{time.time_ns()}', + cluster_size="2X-Small", + max_num_clusters=1, + auto_stop_mins=10).result() + + # cleanup + w.warehouses.delete(id=created.id) + + Create a warehouse. + + Creates a new SQL warehouse. + + :param auto_stop_mins: int (optional) + The amount of time in minutes that a SQL warehouse must be idle (i.e., no RUNNING queries) before it + is automatically stopped. + + Supported values: - Must be == 0 or >= 10 mins - 0 indicates no autostop. + + Defaults to 120 mins + :param channel: :class:`Channel` (optional) + Channel Details + :param cluster_size: str (optional) + Size of the clusters allocated for this warehouse. Increasing the size of a spark cluster allows you + to run larger queries on it. If you want to increase the number of concurrent queries, please tune + max_num_clusters. + + Supported values: - 2X-Small - X-Small - Small - Medium - Large - X-Large - 2X-Large - 3X-Large - + 4X-Large + :param creator_name: str (optional) + warehouse creator name + :param enable_photon: bool (optional) + Configures whether the warehouse should use Photon optimized clusters. + + Defaults to false. + :param enable_serverless_compute: bool (optional) + Configures whether the warehouse should use serverless compute + :param instance_profile_arn: str (optional) + Deprecated. Instance profile used to pass IAM role to the cluster + :param max_num_clusters: int (optional) + Maximum number of clusters that the autoscaler will create to handle concurrent queries. + + Supported values: - Must be >= min_num_clusters - Must be <= 30. + + Defaults to min_clusters if unset. + :param min_num_clusters: int (optional) + Minimum number of available clusters that will be maintained for this SQL warehouse. Increasing this + will ensure that a larger number of clusters are always running and therefore may reduce the cold + start time for new queries. This is similar to reserved vs. revocable cores in a resource manager. + + Supported values: - Must be > 0 - Must be <= min(max_num_clusters, 30) + + Defaults to 1 + :param name: str (optional) + Logical name for the cluster. + + Supported values: - Must be unique within an org. - Must be less than 100 characters. + :param spot_instance_policy: :class:`SpotInstancePolicy` (optional) + Configurations whether the warehouse should use spot instances. + :param tags: :class:`EndpointTags` (optional) + A set of key-value pairs that will be tagged on all resources (e.g., AWS instances and EBS volumes) + associated with this SQL warehouse. + + Supported values: - Number of tags < 45. + :param warehouse_type: :class:`CreateWarehouseRequestWarehouseType` (optional) + Warehouse type: `PRO` or `CLASSIC`. If you want to use serverless compute, you must set to `PRO` and + also set the field `enable_serverless_compute` to `true`. + + :returns: + Long-running operation waiter for :class:`GetWarehouseResponse`. + See :method:wait_get_warehouse_running for more details. + + + .. py:method:: create_and_wait( [, auto_stop_mins: Optional[int], channel: Optional[Channel], cluster_size: Optional[str], creator_name: Optional[str], enable_photon: Optional[bool], enable_serverless_compute: Optional[bool], instance_profile_arn: Optional[str], max_num_clusters: Optional[int], min_num_clusters: Optional[int], name: Optional[str], spot_instance_policy: Optional[SpotInstancePolicy], tags: Optional[EndpointTags], warehouse_type: Optional[CreateWarehouseRequestWarehouseType], timeout: datetime.timedelta = 0:20:00]) -> GetWarehouseResponse + + + .. py:method:: delete(id: str) + + Delete a warehouse. + + Deletes a SQL warehouse. + + :param id: str + Required. Id of the SQL warehouse. + + + + + .. py:method:: edit(id: str [, auto_stop_mins: Optional[int], channel: Optional[Channel], cluster_size: Optional[str], creator_name: Optional[str], enable_photon: Optional[bool], enable_serverless_compute: Optional[bool], instance_profile_arn: Optional[str], max_num_clusters: Optional[int], min_num_clusters: Optional[int], name: Optional[str], spot_instance_policy: Optional[SpotInstancePolicy], tags: Optional[EndpointTags], warehouse_type: Optional[EditWarehouseRequestWarehouseType]]) -> Wait[GetWarehouseResponse] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.warehouses.create(name=f'sdk-{time.time_ns()}', + cluster_size="2X-Small", + max_num_clusters=1, + auto_stop_mins=10).result() + + _ = w.warehouses.edit(id=created.id, + name=f'sdk-{time.time_ns()}', + cluster_size="2X-Small", + max_num_clusters=1, + auto_stop_mins=10) + + # cleanup + w.warehouses.delete(id=created.id) + + Update a warehouse. + + Updates the configuration for a SQL warehouse. + + :param id: str + Required. Id of the warehouse to configure. + :param auto_stop_mins: int (optional) + The amount of time in minutes that a SQL warehouse must be idle (i.e., no RUNNING queries) before it + is automatically stopped. + + Supported values: - Must be == 0 or >= 10 mins - 0 indicates no autostop. + + Defaults to 120 mins + :param channel: :class:`Channel` (optional) + Channel Details + :param cluster_size: str (optional) + Size of the clusters allocated for this warehouse. Increasing the size of a spark cluster allows you + to run larger queries on it. If you want to increase the number of concurrent queries, please tune + max_num_clusters. + + Supported values: - 2X-Small - X-Small - Small - Medium - Large - X-Large - 2X-Large - 3X-Large - + 4X-Large + :param creator_name: str (optional) + warehouse creator name + :param enable_photon: bool (optional) + Configures whether the warehouse should use Photon optimized clusters. + + Defaults to false. + :param enable_serverless_compute: bool (optional) + Configures whether the warehouse should use serverless compute. + :param instance_profile_arn: str (optional) + Deprecated. Instance profile used to pass IAM role to the cluster + :param max_num_clusters: int (optional) + Maximum number of clusters that the autoscaler will create to handle concurrent queries. + + Supported values: - Must be >= min_num_clusters - Must be <= 30. + + Defaults to min_clusters if unset. + :param min_num_clusters: int (optional) + Minimum number of available clusters that will be maintained for this SQL warehouse. Increasing this + will ensure that a larger number of clusters are always running and therefore may reduce the cold + start time for new queries. This is similar to reserved vs. revocable cores in a resource manager. + + Supported values: - Must be > 0 - Must be <= min(max_num_clusters, 30) + + Defaults to 1 + :param name: str (optional) + Logical name for the cluster. + + Supported values: - Must be unique within an org. - Must be less than 100 characters. + :param spot_instance_policy: :class:`SpotInstancePolicy` (optional) + Configurations whether the warehouse should use spot instances. + :param tags: :class:`EndpointTags` (optional) + A set of key-value pairs that will be tagged on all resources (e.g., AWS instances and EBS volumes) + associated with this SQL warehouse. + + Supported values: - Number of tags < 45. + :param warehouse_type: :class:`EditWarehouseRequestWarehouseType` (optional) + Warehouse type: `PRO` or `CLASSIC`. If you want to use serverless compute, you must set to `PRO` and + also set the field `enable_serverless_compute` to `true`. + + :returns: + Long-running operation waiter for :class:`GetWarehouseResponse`. + See :method:wait_get_warehouse_running for more details. + + + .. py:method:: edit_and_wait(id: str [, auto_stop_mins: Optional[int], channel: Optional[Channel], cluster_size: Optional[str], creator_name: Optional[str], enable_photon: Optional[bool], enable_serverless_compute: Optional[bool], instance_profile_arn: Optional[str], max_num_clusters: Optional[int], min_num_clusters: Optional[int], name: Optional[str], spot_instance_policy: Optional[SpotInstancePolicy], tags: Optional[EndpointTags], warehouse_type: Optional[EditWarehouseRequestWarehouseType], timeout: datetime.timedelta = 0:20:00]) -> GetWarehouseResponse + + + .. py:method:: get(id: str) -> GetWarehouseResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + created = w.warehouses.create(name=f'sdk-{time.time_ns()}', + cluster_size="2X-Small", + max_num_clusters=1, + auto_stop_mins=10).result() + + wh = w.warehouses.get(id=created.id) + + # cleanup + w.warehouses.delete(id=created.id) + + Get warehouse info. + + Gets the information for a single SQL warehouse. + + :param id: str + Required. Id of the SQL warehouse. + + :returns: :class:`GetWarehouseResponse` + + + .. py:method:: get_permission_levels(warehouse_id: str) -> GetWarehousePermissionLevelsResponse + + Get SQL warehouse permission levels. + + Gets the permission levels that a user can have on an object. + + :param warehouse_id: str + The SQL warehouse for which to get or manage permissions. + + :returns: :class:`GetWarehousePermissionLevelsResponse` + + + .. py:method:: get_permissions(warehouse_id: str) -> WarehousePermissions + + Get SQL warehouse permissions. + + Gets the permissions of a SQL warehouse. SQL warehouses can inherit permissions from their root + object. + + :param warehouse_id: str + The SQL warehouse for which to get or manage permissions. + + :returns: :class:`WarehousePermissions` + + + .. py:method:: get_workspace_warehouse_config() -> GetWorkspaceWarehouseConfigResponse + + Get the workspace configuration. + + Gets the workspace level configuration that is shared by all SQL warehouses in a workspace. + + :returns: :class:`GetWorkspaceWarehouseConfigResponse` + + + .. py:method:: list( [, run_as_user_id: Optional[int]]) -> Iterator[EndpointInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import sql + + w = WorkspaceClient() + + all = w.warehouses.list(sql.ListWarehousesRequest()) + + List warehouses. + + Lists all SQL warehouses that a user has manager permissions on. + + :param run_as_user_id: int (optional) + Service Principal which will be used to fetch the list of warehouses. If not specified, the user + from the session header is used. + + :returns: Iterator over :class:`EndpointInfo` + + + .. py:method:: set_permissions(warehouse_id: str [, access_control_list: Optional[List[WarehouseAccessControlRequest]]]) -> WarehousePermissions + + Set SQL warehouse permissions. + + Sets permissions on a SQL warehouse. SQL warehouses can inherit permissions from their root object. + + :param warehouse_id: str + The SQL warehouse for which to get or manage permissions. + :param access_control_list: List[:class:`WarehouseAccessControlRequest`] (optional) + + :returns: :class:`WarehousePermissions` + + + .. py:method:: set_workspace_warehouse_config( [, channel: Optional[Channel], config_param: Optional[RepeatedEndpointConfPairs], data_access_config: Optional[List[EndpointConfPair]], enabled_warehouse_types: Optional[List[WarehouseTypePair]], global_param: Optional[RepeatedEndpointConfPairs], google_service_account: Optional[str], instance_profile_arn: Optional[str], security_policy: Optional[SetWorkspaceWarehouseConfigRequestSecurityPolicy], sql_configuration_parameters: Optional[RepeatedEndpointConfPairs]]) + + Set the workspace configuration. + + Sets the workspace level configuration that is shared by all SQL warehouses in a workspace. + + :param channel: :class:`Channel` (optional) + Optional: Channel selection details + :param config_param: :class:`RepeatedEndpointConfPairs` (optional) + Deprecated: Use sql_configuration_parameters + :param data_access_config: List[:class:`EndpointConfPair`] (optional) + Spark confs for external hive metastore configuration JSON serialized size must be less than <= 512K + :param enabled_warehouse_types: List[:class:`WarehouseTypePair`] (optional) + List of Warehouse Types allowed in this workspace (limits allowed value of the type field in + CreateWarehouse and EditWarehouse). Note: Some types cannot be disabled, they don't need to be + specified in SetWorkspaceWarehouseConfig. Note: Disabling a type may cause existing warehouses to be + converted to another type. Used by frontend to save specific type availability in the warehouse + create and edit form UI. + :param global_param: :class:`RepeatedEndpointConfPairs` (optional) + Deprecated: Use sql_configuration_parameters + :param google_service_account: str (optional) + GCP only: Google Service Account used to pass to cluster to access Google Cloud Storage + :param instance_profile_arn: str (optional) + AWS Only: Instance profile used to pass IAM role to the cluster + :param security_policy: :class:`SetWorkspaceWarehouseConfigRequestSecurityPolicy` (optional) + Security policy for warehouses + :param sql_configuration_parameters: :class:`RepeatedEndpointConfPairs` (optional) + SQL configuration parameters + + + + + .. py:method:: start(id: str) -> Wait[GetWarehouseResponse] + + Start a warehouse. + + Starts a SQL warehouse. + + :param id: str + Required. Id of the SQL warehouse. + + :returns: + Long-running operation waiter for :class:`GetWarehouseResponse`. + See :method:wait_get_warehouse_running for more details. + + + .. py:method:: start_and_wait(id: str, timeout: datetime.timedelta = 0:20:00) -> GetWarehouseResponse + + + .. py:method:: stop(id: str) -> Wait[GetWarehouseResponse] + + Stop a warehouse. + + Stops a SQL warehouse. + + :param id: str + Required. Id of the SQL warehouse. + + :returns: + Long-running operation waiter for :class:`GetWarehouseResponse`. + See :method:wait_get_warehouse_stopped for more details. + + + .. py:method:: stop_and_wait(id: str, timeout: datetime.timedelta = 0:20:00) -> GetWarehouseResponse + + + .. py:method:: update_permissions(warehouse_id: str [, access_control_list: Optional[List[WarehouseAccessControlRequest]]]) -> WarehousePermissions + + Update SQL warehouse permissions. + + Updates the permissions on a SQL warehouse. SQL warehouses can inherit permissions from their root + object. + + :param warehouse_id: str + The SQL warehouse for which to get or manage permissions. + :param access_control_list: List[:class:`WarehouseAccessControlRequest`] (optional) + + :returns: :class:`WarehousePermissions` + + + .. py:method:: wait_get_warehouse_running(id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[GetWarehouseResponse], None]]) -> GetWarehouseResponse + + + .. py:method:: wait_get_warehouse_stopped(id: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[GetWarehouseResponse], None]]) -> GetWarehouseResponse diff --git a/docs/workspace/vectorsearch/index.rst b/docs/workspace/vectorsearch/index.rst new file mode 100644 index 000000000..b4af05509 --- /dev/null +++ b/docs/workspace/vectorsearch/index.rst @@ -0,0 +1,11 @@ + +Vector Search +============= + +Create and query Vector Search indexes + +.. toctree:: + :maxdepth: 1 + + vector_search_endpoints + vector_search_indexes \ No newline at end of file diff --git a/docs/workspace/vectorsearch/vector_search_endpoints.rst b/docs/workspace/vectorsearch/vector_search_endpoints.rst new file mode 100644 index 000000000..64d92cec2 --- /dev/null +++ b/docs/workspace/vectorsearch/vector_search_endpoints.rst @@ -0,0 +1,60 @@ +``w.vector_search_endpoints``: Endpoints +======================================== +.. currentmodule:: databricks.sdk.service.vectorsearch + +.. py:class:: VectorSearchEndpointsAPI + + **Endpoint**: Represents the compute resources to host vector search indexes. + + .. py:method:: create_endpoint(name: str, endpoint_type: EndpointType) -> Wait[EndpointInfo] + + Create an endpoint. + + Create a new endpoint. + + :param name: str + Name of endpoint + :param endpoint_type: :class:`EndpointType` + Type of endpoint. + + :returns: + Long-running operation waiter for :class:`EndpointInfo`. + See :method:wait_get_endpoint_vector_search_endpoint_online for more details. + + + .. py:method:: create_endpoint_and_wait(name: str, endpoint_type: EndpointType, timeout: datetime.timedelta = 0:20:00) -> EndpointInfo + + + .. py:method:: delete_endpoint(endpoint_name: str, name: str) + + Delete an endpoint. + + :param endpoint_name: str + Name of the endpoint + :param name: str + Name of the endpoint to delete + + + + + .. py:method:: get_endpoint(endpoint_name: str) -> EndpointInfo + + Get an endpoint. + + :param endpoint_name: str + Name of the endpoint + + :returns: :class:`EndpointInfo` + + + .. py:method:: list_endpoints( [, page_token: Optional[str]]) -> Iterator[EndpointInfo] + + List all endpoints. + + :param page_token: str (optional) + Token for pagination + + :returns: Iterator over :class:`EndpointInfo` + + + .. py:method:: wait_get_endpoint_vector_search_endpoint_online(endpoint_name: str, timeout: datetime.timedelta = 0:20:00, callback: Optional[Callable[[EndpointInfo], None]]) -> EndpointInfo diff --git a/docs/workspace/vectorsearch/vector_search_indexes.rst b/docs/workspace/vectorsearch/vector_search_indexes.rst new file mode 100644 index 000000000..f79335e4f --- /dev/null +++ b/docs/workspace/vectorsearch/vector_search_indexes.rst @@ -0,0 +1,145 @@ +``w.vector_search_indexes``: Indexes +==================================== +.. currentmodule:: databricks.sdk.service.vectorsearch + +.. py:class:: VectorSearchIndexesAPI + + **Index**: An efficient representation of your embedding vectors that supports real-time and efficient + approximate nearest neighbor (ANN) search queries. + + There are 2 types of Vector Search indexes: * **Delta Sync Index**: An index that automatically syncs with + a source Delta Table, automatically and incrementally updating the index as the underlying data in the + Delta Table changes. * **Direct Vector Access Index**: An index that supports direct read and write of + vectors and metadata through our REST and SDK APIs. With this model, the user manages index updates. + + .. py:method:: create_index(name: str, primary_key: str, index_type: VectorIndexType [, delta_sync_vector_index_spec: Optional[DeltaSyncVectorIndexSpecRequest], direct_access_index_spec: Optional[DirectAccessVectorIndexSpec], endpoint_name: Optional[str]]) -> CreateVectorIndexResponse + + Create an index. + + Create a new index. + + :param name: str + Name of the index + :param primary_key: str + Primary key of the index + :param index_type: :class:`VectorIndexType` + There are 2 types of Vector Search indexes: + + - `DELTA_SYNC`: An index that automatically syncs with a source Delta Table, automatically and + incrementally updating the index as the underlying data in the Delta Table changes. - + `DIRECT_ACCESS`: An index that supports direct read and write of vectors and metadata through our + REST and SDK APIs. With this model, the user manages index updates. + :param delta_sync_vector_index_spec: :class:`DeltaSyncVectorIndexSpecRequest` (optional) + Specification for Delta Sync Index. Required if `index_type` is `DELTA_SYNC`. + :param direct_access_index_spec: :class:`DirectAccessVectorIndexSpec` (optional) + Specification for Direct Vector Access Index. Required if `index_type` is `DIRECT_ACCESS`. + :param endpoint_name: str (optional) + Name of the endpoint to be used for serving the index + + :returns: :class:`CreateVectorIndexResponse` + + + .. py:method:: delete_data_vector_index(name: str, primary_keys: List[str]) -> DeleteDataVectorIndexResponse + + Delete data from index. + + Handles the deletion of data from a specified vector index. + + :param name: str + Name of the vector index where data is to be deleted. Must be a Direct Vector Access Index. + :param primary_keys: List[str] + List of primary keys for the data to be deleted. + + :returns: :class:`DeleteDataVectorIndexResponse` + + + .. py:method:: delete_index(index_name: str) + + Delete an index. + + Delete an index. + + :param index_name: str + Name of the index + + + + + .. py:method:: get_index(index_name: str) -> VectorIndex + + Get an index. + + Get an index. + + :param index_name: str + Name of the index + + :returns: :class:`VectorIndex` + + + .. py:method:: list_indexes(endpoint_name: str [, page_token: Optional[str]]) -> Iterator[MiniVectorIndex] + + List indexes. + + List all indexes in the given endpoint. + + :param endpoint_name: str + Name of the endpoint + :param page_token: str (optional) + Token for pagination + + :returns: Iterator over :class:`MiniVectorIndex` + + + .. py:method:: query_index(index_name: str, columns: List[str] [, filters_json: Optional[str], num_results: Optional[int], query_text: Optional[str], query_vector: Optional[List[float]]]) -> QueryVectorIndexResponse + + Query an index. + + Query the specified vector index. + + :param index_name: str + Name of the vector index to query. + :param columns: List[str] + List of column names to include in the response. + :param filters_json: str (optional) + JSON string representing query filters. + + Example filters: - `{"id <": 5}`: Filter for id less than 5. - `{"id >": 5}`: Filter for id greater + than 5. - `{"id <=": 5}`: Filter for id less than equal to 5. - `{"id >=": 5}`: Filter for id + greater than equal to 5. - `{"id": 5}`: Filter for id equal to 5. + :param num_results: int (optional) + Number of results to return. Defaults to 10. + :param query_text: str (optional) + Query text. Required for Delta Sync Index using model endpoint. + :param query_vector: List[float] (optional) + Query vector. Required for Direct Vector Access Index and Delta Sync Index using self-managed + vectors. + + :returns: :class:`QueryVectorIndexResponse` + + + .. py:method:: sync_index(index_name: str) + + Synchronize an index. + + Triggers a synchronization process for a specified vector index. + + :param index_name: str + Name of the vector index to synchronize. Must be a Delta Sync Index. + + + + + .. py:method:: upsert_data_vector_index(name: str, inputs_json: str) -> UpsertDataVectorIndexResponse + + Upsert data into an index. + + Handles the upserting of data into a specified vector index. + + :param name: str + Name of the vector index where data is to be upserted. Must be a Direct Vector Access Index. + :param inputs_json: str + JSON string representing the data to be upserted. + + :returns: :class:`UpsertDataVectorIndexResponse` + \ No newline at end of file diff --git a/docs/workspace/workspace/git_credentials.rst b/docs/workspace/workspace/git_credentials.rst new file mode 100644 index 000000000..f43b25b07 --- /dev/null +++ b/docs/workspace/workspace/git_credentials.rst @@ -0,0 +1,146 @@ +``w.git_credentials``: Git Credentials +====================================== +.. currentmodule:: databricks.sdk.service.workspace + +.. py:class:: GitCredentialsAPI + + Registers personal access token for Databricks to do operations on behalf of the user. + + See [more info]. + + [more info]: https://docs.databricks.com/repos/get-access-tokens-from-git-provider.html + + .. py:method:: create(git_provider: str [, git_username: Optional[str], personal_access_token: Optional[str]]) -> CreateCredentialsResponse + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + cr = w.git_credentials.create(git_provider="gitHub", git_username="test", personal_access_token="test") + + # cleanup + w.git_credentials.delete(credential_id=cr.credential_id) + + Create a credential entry. + + Creates a Git credential entry for the user. Only one Git credential per user is supported, so any + attempts to create credentials if an entry already exists will fail. Use the PATCH endpoint to update + existing credentials, or the DELETE endpoint to delete existing credentials. + + :param git_provider: str + Git provider. This field is case-insensitive. The available Git providers are gitHub, + bitbucketCloud, gitLab, azureDevOpsServices, gitHubEnterprise, bitbucketServer, + gitLabEnterpriseEdition and awsCodeCommit. + :param git_username: str (optional) + Git username. + :param personal_access_token: str (optional) + The personal access token used to authenticate to the corresponding Git provider. + + :returns: :class:`CreateCredentialsResponse` + + + .. py:method:: delete(credential_id: int) + + Delete a credential. + + Deletes the specified Git credential. + + :param credential_id: int + The ID for the corresponding credential to access. + + + + + .. py:method:: get(credential_id: int) -> CredentialInfo + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + cr = w.git_credentials.create(git_provider="gitHub", git_username="test", personal_access_token="test") + + by_id = w.git_credentials.get(credential_id=cr.credential_id) + + # cleanup + w.git_credentials.delete(credential_id=cr.credential_id) + + Get a credential entry. + + Gets the Git credential with the specified credential ID. + + :param credential_id: int + The ID for the corresponding credential to access. + + :returns: :class:`CredentialInfo` + + + .. py:method:: list() -> Iterator[CredentialInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + list = w.git_credentials.list() + + Get Git credentials. + + Lists the calling user's Git credentials. One credential per user is supported. + + :returns: Iterator over :class:`CredentialInfo` + + + .. py:method:: update(credential_id: int [, git_provider: Optional[str], git_username: Optional[str], personal_access_token: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + cr = w.git_credentials.create(git_provider="gitHub", git_username="test", personal_access_token="test") + + w.git_credentials.update(credential_id=cr.credential_id, + git_provider="gitHub", + git_username=f'sdk-{time.time_ns()}@example.com', + personal_access_token=f'sdk-{time.time_ns()}') + + # cleanup + w.git_credentials.delete(credential_id=cr.credential_id) + + Update a credential. + + Updates the specified Git credential. + + :param credential_id: int + The ID for the corresponding credential to access. + :param git_provider: str (optional) + Git provider. This field is case-insensitive. The available Git providers are gitHub, + bitbucketCloud, gitLab, azureDevOpsServices, gitHubEnterprise, bitbucketServer, + gitLabEnterpriseEdition and awsCodeCommit. + :param git_username: str (optional) + Git username. + :param personal_access_token: str (optional) + The personal access token used to authenticate to the corresponding Git provider. + + + \ No newline at end of file diff --git a/docs/workspace/workspace/index.rst b/docs/workspace/workspace/index.rst new file mode 100644 index 000000000..c52ba1acb --- /dev/null +++ b/docs/workspace/workspace/index.rst @@ -0,0 +1,13 @@ + +Workspace +========= + +Manage workspace-level entities that include notebooks, Git checkouts, and secrets + +.. toctree:: + :maxdepth: 1 + + git_credentials + repos + secrets + workspace \ No newline at end of file diff --git a/docs/workspace/workspace/repos.rst b/docs/workspace/workspace/repos.rst new file mode 100644 index 000000000..584ad70b3 --- /dev/null +++ b/docs/workspace/workspace/repos.rst @@ -0,0 +1,219 @@ +``w.repos``: Repos +================== +.. currentmodule:: databricks.sdk.service.workspace + +.. py:class:: ReposAPI + + The Repos API allows users to manage their git repos. Users can use the API to access all repos that they + have manage permissions on. + + Databricks Repos is a visual Git client in Databricks. It supports common Git operations such a cloning a + repository, committing and pushing, pulling, branch management, and visual comparison of diffs when + committing. + + Within Repos you can develop code in notebooks or other files and follow data science and engineering code + development best practices using Git for version control, collaboration, and CI/CD. + + .. py:method:: create(url: str, provider: str [, path: Optional[str], sparse_checkout: Optional[SparseCheckout]]) -> RepoInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + root = f'sdk-{time.time_ns()}' + + ri = w.repos.create(path=root, url="https://github.com/shreyas-goenka/empty-repo.git", provider="github") + + # cleanup + w.repos.delete(repo_id=ri.id) + + Create a repo. + + Creates a repo in the workspace and links it to the remote Git repo specified. Note that repos created + programmatically must be linked to a remote Git repo, unlike repos created in the browser. + + :param url: str + URL of the Git repository to be linked. + :param provider: str + Git provider. This field is case-insensitive. The available Git providers are gitHub, + bitbucketCloud, gitLab, azureDevOpsServices, gitHubEnterprise, bitbucketServer, + gitLabEnterpriseEdition and awsCodeCommit. + :param path: str (optional) + Desired path for the repo in the workspace. Must be in the format /Repos/{folder}/{repo-name}. + :param sparse_checkout: :class:`SparseCheckout` (optional) + If specified, the repo will be created with sparse checkout enabled. You cannot enable/disable + sparse checkout after the repo is created. + + :returns: :class:`RepoInfo` + + + .. py:method:: delete(repo_id: int) + + Delete a repo. + + Deletes the specified repo. + + :param repo_id: int + The ID for the corresponding repo to access. + + + + + .. py:method:: get(repo_id: int) -> RepoInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + root = f'sdk-{time.time_ns()}' + + ri = w.repos.create(path=root, url="https://github.com/shreyas-goenka/empty-repo.git", provider="github") + + by_id = w.repos.get(repo_id=ri.id) + + # cleanup + w.repos.delete(repo_id=ri.id) + + Get a repo. + + Returns the repo with the given repo ID. + + :param repo_id: int + The ID for the corresponding repo to access. + + :returns: :class:`RepoInfo` + + + .. py:method:: get_permission_levels(repo_id: str) -> GetRepoPermissionLevelsResponse + + Get repo permission levels. + + Gets the permission levels that a user can have on an object. + + :param repo_id: str + The repo for which to get or manage permissions. + + :returns: :class:`GetRepoPermissionLevelsResponse` + + + .. py:method:: get_permissions(repo_id: str) -> RepoPermissions + + Get repo permissions. + + Gets the permissions of a repo. Repos can inherit permissions from their root object. + + :param repo_id: str + The repo for which to get or manage permissions. + + :returns: :class:`RepoPermissions` + + + .. py:method:: list( [, next_page_token: Optional[str], path_prefix: Optional[str]]) -> Iterator[RepoInfo] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import workspace + + w = WorkspaceClient() + + all = w.repos.list(workspace.ListReposRequest()) + + Get repos. + + Returns repos that the calling user has Manage permissions on. Results are paginated with each page + containing twenty repos. + + :param next_page_token: str (optional) + Token used to get the next page of results. If not specified, returns the first page of results as + well as a next page token if there are more results. + :param path_prefix: str (optional) + Filters repos that have paths starting with the given path prefix. + + :returns: Iterator over :class:`RepoInfo` + + + .. py:method:: set_permissions(repo_id: str [, access_control_list: Optional[List[RepoAccessControlRequest]]]) -> RepoPermissions + + Set repo permissions. + + Sets permissions on a repo. Repos can inherit permissions from their root object. + + :param repo_id: str + The repo for which to get or manage permissions. + :param access_control_list: List[:class:`RepoAccessControlRequest`] (optional) + + :returns: :class:`RepoPermissions` + + + .. py:method:: update(repo_id: int [, branch: Optional[str], sparse_checkout: Optional[SparseCheckoutUpdate], tag: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + root = f'sdk-{time.time_ns()}' + + ri = w.repos.create(path=root, url="https://github.com/shreyas-goenka/empty-repo.git", provider="github") + + w.repos.update(repo_id=ri.id, branch="foo") + + # cleanup + w.repos.delete(repo_id=ri.id) + + Update a repo. + + Updates the repo to a different branch or tag, or updates the repo to the latest commit on the same + branch. + + :param repo_id: int + The ID for the corresponding repo to access. + :param branch: str (optional) + Branch that the local version of the repo is checked out to. + :param sparse_checkout: :class:`SparseCheckoutUpdate` (optional) + If specified, update the sparse checkout settings. The update will fail if sparse checkout is not + enabled for the repo. + :param tag: str (optional) + Tag that the local version of the repo is checked out to. Updating the repo to a tag puts the repo + in a detached HEAD state. Before committing new changes, you must update the repo to a branch + instead of the detached HEAD. + + + + + .. py:method:: update_permissions(repo_id: str [, access_control_list: Optional[List[RepoAccessControlRequest]]]) -> RepoPermissions + + Update repo permissions. + + Updates the permissions on a repo. Repos can inherit permissions from their root object. + + :param repo_id: str + The repo for which to get or manage permissions. + :param access_control_list: List[:class:`RepoAccessControlRequest`] (optional) + + :returns: :class:`RepoPermissions` + \ No newline at end of file diff --git a/docs/workspace/workspace/secrets.rst b/docs/workspace/workspace/secrets.rst new file mode 100644 index 000000000..add03d224 --- /dev/null +++ b/docs/workspace/workspace/secrets.rst @@ -0,0 +1,367 @@ +``w.secrets``: Secret +===================== +.. currentmodule:: databricks.sdk.service.workspace + +.. py:class:: SecretsAPI + + The Secrets API allows you to manage secrets, secret scopes, and access permissions. + + Sometimes accessing data requires that you authenticate to external data sources through JDBC. Instead of + directly entering your credentials into a notebook, use Databricks secrets to store your credentials and + reference them in notebooks and jobs. + + Administrators, secret creators, and users granted permission can read Databricks secrets. While + Databricks makes an effort to redact secret values that might be displayed in notebooks, it is not + possible to prevent such users from reading secrets. + + .. py:method:: create_scope(scope: str [, backend_azure_keyvault: Optional[AzureKeyVaultSecretScopeMetadata], initial_manage_principal: Optional[str], scope_backend_type: Optional[ScopeBackendType]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + key_name = f'sdk-{time.time_ns()}' + + scope_name = f'sdk-{time.time_ns()}' + + w.secrets.create_scope(scope=scope_name) + + # cleanup + w.secrets.delete_secret(scope=scope_name, key=key_name) + w.secrets.delete_scope(scope=scope_name) + + Create a new secret scope. + + The scope name must consist of alphanumeric characters, dashes, underscores, and periods, and may not + exceed 128 characters. The maximum number of scopes in a workspace is 100. + + :param scope: str + Scope name requested by the user. Scope names are unique. + :param backend_azure_keyvault: :class:`AzureKeyVaultSecretScopeMetadata` (optional) + The metadata for the secret scope if the type is `AZURE_KEYVAULT` + :param initial_manage_principal: str (optional) + The principal that is initially granted `MANAGE` permission to the created scope. + :param scope_backend_type: :class:`ScopeBackendType` (optional) + The backend type the scope will be created with. If not specified, will default to `DATABRICKS` + + + + + .. py:method:: delete_acl(scope: str, principal: str) + + Delete an ACL. + + Deletes the given ACL on the given scope. + + Users must have the `MANAGE` permission to invoke this API. Throws `RESOURCE_DOES_NOT_EXIST` if no + such secret scope, principal, or ACL exists. Throws `PERMISSION_DENIED` if the user does not have + permission to make this API call. + + :param scope: str + The name of the scope to remove permissions from. + :param principal: str + The principal to remove an existing ACL from. + + + + + .. py:method:: delete_scope(scope: str) + + Delete a secret scope. + + Deletes a secret scope. + + Throws `RESOURCE_DOES_NOT_EXIST` if the scope does not exist. Throws `PERMISSION_DENIED` if the user + does not have permission to make this API call. + + :param scope: str + Name of the scope to delete. + + + + + .. py:method:: delete_secret(scope: str, key: str) + + Delete a secret. + + Deletes the secret stored in this secret scope. You must have `WRITE` or `MANAGE` permission on the + secret scope. + + Throws `RESOURCE_DOES_NOT_EXIST` if no such secret scope or secret exists. Throws `PERMISSION_DENIED` + if the user does not have permission to make this API call. + + :param scope: str + The name of the scope that contains the secret to delete. + :param key: str + Name of the secret to delete. + + + + + .. py:method:: get_acl(scope: str, principal: str) -> AclItem + + Get secret ACL details. + + Gets the details about the given ACL, such as the group and permission. Users must have the `MANAGE` + permission to invoke this API. + + Throws `RESOURCE_DOES_NOT_EXIST` if no such secret scope exists. Throws `PERMISSION_DENIED` if the + user does not have permission to make this API call. + + :param scope: str + The name of the scope to fetch ACL information from. + :param principal: str + The principal to fetch ACL information for. + + :returns: :class:`AclItem` + + + .. py:method:: get_secret(scope: str, key: str) -> GetSecretResponse + + Get a secret. + + Gets the bytes representation of a secret value for the specified scope and key. + + Users need the READ permission to make this call. + + Note that the secret value returned is in bytes. The interpretation of the bytes is determined by the + caller in DBUtils and the type the data is decoded into. + + Throws ``PERMISSION_DENIED`` if the user does not have permission to make this API call. Throws + ``RESOURCE_DOES_NOT_EXIST`` if no such secret or secret scope exists. + + :param scope: str + The name of the scope to fetch secret information from. + :param key: str + The key to fetch secret for. + + :returns: :class:`GetSecretResponse` + + + .. py:method:: list_acls(scope: str) -> Iterator[AclItem] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + key_name = f'sdk-{time.time_ns()}' + + scope_name = f'sdk-{time.time_ns()}' + + w.secrets.create_scope(scope=scope_name) + + acls = w.secrets.list_acls(scope=scope_name) + + # cleanup + w.secrets.delete_secret(scope=scope_name, key=key_name) + w.secrets.delete_scope(scope=scope_name) + + Lists ACLs. + + List the ACLs for a given secret scope. Users must have the `MANAGE` permission to invoke this API. + + Throws `RESOURCE_DOES_NOT_EXIST` if no such secret scope exists. Throws `PERMISSION_DENIED` if the + user does not have permission to make this API call. + + :param scope: str + The name of the scope to fetch ACL information from. + + :returns: Iterator over :class:`AclItem` + + + .. py:method:: list_scopes() -> Iterator[SecretScope] + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + scopes = w.secrets.list_scopes() + + List all scopes. + + Lists all secret scopes available in the workspace. + + Throws `PERMISSION_DENIED` if the user does not have permission to make this API call. + + :returns: Iterator over :class:`SecretScope` + + + .. py:method:: list_secrets(scope: str) -> Iterator[SecretMetadata] + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + key_name = f'sdk-{time.time_ns()}' + + scope_name = f'sdk-{time.time_ns()}' + + w.secrets.create_scope(scope=scope_name) + + scrts = w.secrets.list_secrets(scope=scope_name) + + # cleanup + w.secrets.delete_secret(scope=scope_name, key=key_name) + w.secrets.delete_scope(scope=scope_name) + + List secret keys. + + Lists the secret keys that are stored at this scope. This is a metadata-only operation; secret data + cannot be retrieved using this API. Users need the READ permission to make this call. + + The lastUpdatedTimestamp returned is in milliseconds since epoch. Throws `RESOURCE_DOES_NOT_EXIST` if + no such secret scope exists. Throws `PERMISSION_DENIED` if the user does not have permission to make + this API call. + + :param scope: str + The name of the scope to list secrets within. + + :returns: Iterator over :class:`SecretMetadata` + + + .. py:method:: put_acl(scope: str, principal: str, permission: AclPermission) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import workspace + + w = WorkspaceClient() + + key_name = f'sdk-{time.time_ns()}' + + group = w.groups.create(display_name=f'sdk-{time.time_ns()}') + + scope_name = f'sdk-{time.time_ns()}' + + w.secrets.create_scope(scope=scope_name) + + w.secrets.put_acl(scope=scope_name, permission=workspace.AclPermission.MANAGE, principal=group.display_name) + + # cleanup + w.groups.delete(id=group.id) + w.secrets.delete_secret(scope=scope_name, key=key_name) + w.secrets.delete_scope(scope=scope_name) + + Create/update an ACL. + + Creates or overwrites the Access Control List (ACL) associated with the given principal (user or + group) on the specified scope point. + + In general, a user or group will use the most powerful permission available to them, and permissions + are ordered as follows: + + * `MANAGE` - Allowed to change ACLs, and read and write to this secret scope. * `WRITE` - Allowed to + read and write to this secret scope. * `READ` - Allowed to read this secret scope and list what + secrets are available. + + Note that in general, secret values can only be read from within a command on a cluster (for example, + through a notebook). There is no API to read the actual secret value material outside of a cluster. + However, the user's permission will be applied based on who is executing the command, and they must + have at least READ permission. + + Users must have the `MANAGE` permission to invoke this API. + + The principal is a user or group name corresponding to an existing Databricks principal to be granted + or revoked access. + + Throws `RESOURCE_DOES_NOT_EXIST` if no such secret scope exists. Throws `RESOURCE_ALREADY_EXISTS` if a + permission for the principal already exists. Throws `INVALID_PARAMETER_VALUE` if the permission or + principal is invalid. Throws `PERMISSION_DENIED` if the user does not have permission to make this API + call. + + :param scope: str + The name of the scope to apply permissions to. + :param principal: str + The principal in which the permission is applied. + :param permission: :class:`AclPermission` + The permission level applied to the principal. + + + + + .. py:method:: put_secret(scope: str, key: str [, bytes_value: Optional[str], string_value: Optional[str]]) + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + key_name = f'sdk-{time.time_ns()}' + + scope_name = f'sdk-{time.time_ns()}' + + w.secrets.create_scope(scope=scope_name) + + w.secrets.put_secret(scope=scope_name, key=key_name, string_value=f'sdk-{time.time_ns()}') + + # cleanup + w.secrets.delete_secret(scope=scope_name, key=key_name) + w.secrets.delete_scope(scope=scope_name) + + Add a secret. + + Inserts a secret under the provided scope with the given name. If a secret already exists with the + same name, this command overwrites the existing secret's value. The server encrypts the secret using + the secret scope's encryption settings before storing it. + + You must have `WRITE` or `MANAGE` permission on the secret scope. The secret key must consist of + alphanumeric characters, dashes, underscores, and periods, and cannot exceed 128 characters. The + maximum allowed secret value size is 128 KB. The maximum number of secrets in a given scope is 1000. + + The input fields "string_value" or "bytes_value" specify the type of the secret, which will determine + the value returned when the secret value is requested. Exactly one must be specified. + + Throws `RESOURCE_DOES_NOT_EXIST` if no such secret scope exists. Throws `RESOURCE_LIMIT_EXCEEDED` if + maximum number of secrets in scope is exceeded. Throws `INVALID_PARAMETER_VALUE` if the key name or + value length is invalid. Throws `PERMISSION_DENIED` if the user does not have permission to make this + API call. + + :param scope: str + The name of the scope to which the secret will be associated with. + :param key: str + A unique name to identify the secret. + :param bytes_value: str (optional) + If specified, value will be stored as bytes. + :param string_value: str (optional) + If specified, note that the value will be stored in UTF-8 (MB4) form. + + + \ No newline at end of file diff --git a/docs/workspace/workspace/workspace.rst b/docs/workspace/workspace/workspace.rst new file mode 100644 index 000000000..53b875b0c --- /dev/null +++ b/docs/workspace/workspace/workspace.rst @@ -0,0 +1,342 @@ +``w.workspace``: Workspace +========================== +.. currentmodule:: databricks.sdk.service.workspace + +.. py:class:: WorkspaceExt + + The Workspace API allows you to list, import, export, and delete notebooks and folders. + + A notebook is a web-based interface to a document that contains runnable code, visualizations, and + explanatory text. + + .. py:method:: delete(path: str [, recursive: Optional[bool]]) + + Delete a workspace object. + + Deletes an object or a directory (and optionally recursively deletes all objects in the directory). * + If `path` does not exist, this call returns an error `RESOURCE_DOES_NOT_EXIST`. * If `path` is a + non-empty directory and `recursive` is set to `false`, this call returns an error + `DIRECTORY_NOT_EMPTY`. + + Object deletion cannot be undone and deleting a directory recursively is not atomic. + + :param path: str + The absolute path of the notebook or directory. + :param recursive: bool (optional) + The flag that specifies whether to delete the object recursively. It is `false` by default. Please + note this deleting directory is not atomic. If it fails in the middle, some of objects under this + directory may be deleted and cannot be undone. + + + + + .. py:method:: download(path: str [, format: ExportFormat]) -> BinaryIO + + + Usage: + + .. code-block:: + + import io + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service.workspace import ImportFormat + + w = WorkspaceClient() + + py_file = f'/Users/{w.current_user.me().user_name}/file-{time.time_ns()}.py' + + w.workspace.upload(py_file, io.BytesIO(b'print(1)'), format=ImportFormat.AUTO) + with w.workspace.download(py_file) as f: + content = f.read() + assert content == b'print(1)' + + w.workspace.delete(py_file) + + + Downloads notebook or file from the workspace + + :param path: location of the file or notebook on workspace. + :param format: By default, `ExportFormat.SOURCE`. If using `ExportFormat.AUTO` the `path` + is imported or exported as either a workspace file or a notebook, depending + on an analysis of the `item`’s extension and the header content provided in + the request. + :return: file-like `io.BinaryIO` of the `path` contents. + + + .. py:method:: export(path: str [, format: Optional[ExportFormat]]) -> ExportResponse + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import workspace + + w = WorkspaceClient() + + notebook = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + export_response = w.workspace.export(format=workspace.ExportFormat.SOURCE, path=notebook) + + Export a workspace object. + + Exports an object or the contents of an entire directory. + + If `path` does not exist, this call returns an error `RESOURCE_DOES_NOT_EXIST`. + + If the exported data would exceed size limit, this call returns `MAX_NOTEBOOK_SIZE_EXCEEDED`. + Currently, this API does not support exporting a library. + + :param path: str + The absolute path of the object or directory. Exporting a directory is only supported for the `DBC`, + `SOURCE`, and `AUTO` format. + :param format: :class:`ExportFormat` (optional) + This specifies the format of the exported file. By default, this is `SOURCE`. + + The value is case sensitive. + + - `SOURCE`: The notebook is exported as source code. Directory exports will not include non-notebook + entries. - `HTML`: The notebook is exported as an HTML file. - `JUPYTER`: The notebook is exported + as a Jupyter/IPython Notebook file. - `DBC`: The notebook is exported in Databricks archive format. + Directory exports will not include non-notebook entries. - `R_MARKDOWN`: The notebook is exported to + R Markdown format. - `AUTO`: The object or directory is exported depending on the objects type. + Directory exports will include notebooks and workspace files. + + :returns: :class:`ExportResponse` + + + .. py:method:: get_permission_levels(workspace_object_type: str, workspace_object_id: str) -> GetWorkspaceObjectPermissionLevelsResponse + + Get workspace object permission levels. + + Gets the permission levels that a user can have on an object. + + :param workspace_object_type: str + The workspace object type for which to get or manage permissions. + :param workspace_object_id: str + The workspace object for which to get or manage permissions. + + :returns: :class:`GetWorkspaceObjectPermissionLevelsResponse` + + + .. py:method:: get_permissions(workspace_object_type: str, workspace_object_id: str) -> WorkspaceObjectPermissions + + Get workspace object permissions. + + Gets the permissions of a workspace object. Workspace objects can inherit permissions from their + parent objects or root object. + + :param workspace_object_type: str + The workspace object type for which to get or manage permissions. + :param workspace_object_id: str + The workspace object for which to get or manage permissions. + + :returns: :class:`WorkspaceObjectPermissions` + + + .. py:method:: get_status(path: str) -> ObjectInfo + + + Usage: + + .. code-block:: + + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + obj = w.workspace.get_status(path=notebook_path) + + Get status. + + Gets the status of an object or a directory. If `path` does not exist, this call returns an error + `RESOURCE_DOES_NOT_EXIST`. + + :param path: str + The absolute path of the notebook or directory. + + :returns: :class:`ObjectInfo` + + + .. py:method:: import_(path: str [, content: Optional[str], format: Optional[ImportFormat], language: Optional[Language], overwrite: Optional[bool]]) + + + Usage: + + .. code-block:: + + import base64 + import time + + from databricks.sdk import WorkspaceClient + from databricks.sdk.service import workspace + + w = WorkspaceClient() + + notebook_path = f'/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}' + + w.workspace.import_(content=base64.b64encode(("CREATE LIVE TABLE dlt_sample AS SELECT 1").encode()).decode(), + format=workspace.ImportFormat.SOURCE, + language=workspace.Language.SQL, + overwrite=True, + path=notebook_path) + + Import a workspace object. + + Imports a workspace object (for example, a notebook or file) or the contents of an entire directory. + If `path` already exists and `overwrite` is set to `false`, this call returns an error + `RESOURCE_ALREADY_EXISTS`. To import a directory, you can use either the `DBC` format or the `SOURCE` + format with the `language` field unset. To import a single file as `SOURCE`, you must set the + `language` field. + + :param path: str + The absolute path of the object or directory. Importing a directory is only supported for the `DBC` + and `SOURCE` formats. + :param content: str (optional) + The base64-encoded content. This has a limit of 10 MB. + + If the limit (10MB) is exceeded, exception with error code **MAX_NOTEBOOK_SIZE_EXCEEDED** is thrown. + This parameter might be absent, and instead a posted file is used. + :param format: :class:`ImportFormat` (optional) + This specifies the format of the file to be imported. + + The value is case sensitive. + + - `AUTO`: The item is imported depending on an analysis of the item's extension and the header + content provided in the request. If the item is imported as a notebook, then the item's extension is + automatically removed. - `SOURCE`: The notebook or directory is imported as source code. - `HTML`: + The notebook is imported as an HTML file. - `JUPYTER`: The notebook is imported as a Jupyter/IPython + Notebook file. - `DBC`: The notebook is imported in Databricks archive format. Required for + directories. - `R_MARKDOWN`: The notebook is imported from R Markdown format. + :param language: :class:`Language` (optional) + The language of the object. This value is set only if the object type is `NOTEBOOK`. + :param overwrite: bool (optional) + The flag that specifies whether to overwrite existing object. It is `false` by default. For `DBC` + format, `overwrite` is not supported since it may contain a directory. + + + + + .. py:method:: list(path: str [, notebooks_modified_after: int, recursive: bool = False]) -> ObjectInfo + + + Usage: + + .. code-block:: + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + names = [] + for i in w.workspace.list(f'/Users/{w.current_user.me().user_name}', recursive=True): + names.append(i.path) + assert len(names) > 0 + + List workspace objects + + :param recursive: bool + Optionally invoke recursive traversal + + :returns: Iterator of workspaceObjectInfo + + + .. py:method:: mkdirs(path: str) + + Create a directory. + + Creates the specified directory (and necessary parent directories if they do not exist). If there is + an object (not a directory) at any prefix of the input path, this call returns an error + `RESOURCE_ALREADY_EXISTS`. + + Note that if this operation fails it may have succeeded in creating some of the necessary parent + directories. + + :param path: str + The absolute path of the directory. If the parent directories do not exist, it will also create + them. If the directory already exists, this command will do nothing and succeed. + + + + + .. py:method:: set_permissions(workspace_object_type: str, workspace_object_id: str [, access_control_list: Optional[List[WorkspaceObjectAccessControlRequest]]]) -> WorkspaceObjectPermissions + + Set workspace object permissions. + + Sets permissions on a workspace object. Workspace objects can inherit permissions from their parent + objects or root object. + + :param workspace_object_type: str + The workspace object type for which to get or manage permissions. + :param workspace_object_id: str + The workspace object for which to get or manage permissions. + :param access_control_list: List[:class:`WorkspaceObjectAccessControlRequest`] (optional) + + :returns: :class:`WorkspaceObjectPermissions` + + + .. py:method:: update_permissions(workspace_object_type: str, workspace_object_id: str [, access_control_list: Optional[List[WorkspaceObjectAccessControlRequest]]]) -> WorkspaceObjectPermissions + + Update workspace object permissions. + + Updates the permissions on a workspace object. Workspace objects can inherit permissions from their + parent objects or root object. + + :param workspace_object_type: str + The workspace object type for which to get or manage permissions. + :param workspace_object_id: str + The workspace object for which to get or manage permissions. + :param access_control_list: List[:class:`WorkspaceObjectAccessControlRequest`] (optional) + + :returns: :class:`WorkspaceObjectPermissions` + + + .. py:method:: upload(path: str, content: BinaryIO [, format: ImportFormat, language: Language, overwrite: bool = False]) + + + Usage: + + .. code-block:: + + import io + import time + + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + notebook = f'/Users/{w.current_user.me().user_name}/notebook-{time.time_ns()}.py' + + w.workspace.upload(notebook, io.BytesIO(b'print(1)')) + with w.workspace.download(notebook) as f: + content = f.read() + assert content == b'# Databricks notebook source\nprint(1)' + + w.workspace.delete(notebook) + + + Uploads a workspace object (for example, a notebook or file) or the contents of an entire + directory (`DBC` format). + + Errors: + * `RESOURCE_ALREADY_EXISTS`: if `path` already exists no `overwrite=True`. + * `INVALID_PARAMETER_VALUE`: if `format` and `content` values are not compatible. + + :param path: target location of the file on workspace. + :param content: file-like `io.BinaryIO` of the `path` contents. + :param format: By default, `ImportFormat.SOURCE`. If using `ImportFormat.AUTO` the `path` + is imported or exported as either a workspace file or a notebook, depending + on an analysis of the `item`’s extension and the header content provided in + the request. In addition, if the `path` is imported as a notebook, then + the `item`’s extension is automatically removed. + :param language: Only required if using `ExportFormat.SOURCE`. + \ No newline at end of file