From 7d854ee4f64ff4e68b47decb7c9fcf3f590975d4 Mon Sep 17 00:00:00 2001 From: Omkar P <45419097+omkar-foss@users.noreply.github.com> Date: Wed, 12 Feb 2025 01:29:26 +0530 Subject: [PATCH 1/3] feat(python): add capability to read unity catalog (uc://) uris This adds capability to read directly from uc:// uris using the local catalog-unity crate. This also exposes the UC temporary credentials in storage_options of the `DeltaTable` instance so polars or similar readers can use it. Signed-off-by: Omkar P <45419097+omkar-foss@users.noreply.github.com> --- crates/catalog-unity/src/lib.rs | 145 +++++++++++++++++++++++++++++-- crates/core/src/table/builder.rs | 5 +- crates/deltalake/src/lib.rs | 2 + python/src/lib.rs | 1 + 4 files changed, 146 insertions(+), 7 deletions(-) diff --git a/crates/catalog-unity/src/lib.rs b/crates/catalog-unity/src/lib.rs index db9153b4b7..fe5ca1b3e9 100644 --- a/crates/catalog-unity/src/lib.rs +++ b/crates/catalog-unity/src/lib.rs @@ -7,8 +7,12 @@ compile_error!( for this crate to function properly." ); +use deltalake_core::logstore::{default_logstore, logstores, LogStore, LogStoreFactory}; use reqwest::header::{HeaderValue, InvalidHeaderValue, AUTHORIZATION}; +use reqwest::Url; +use std::collections::HashMap; use std::str::FromStr; +use std::sync::Arc; use crate::credential::{ AzureCliCredential, ClientSecretOAuthProvider, CredentialProvider, WorkspaceOAuthProvider, @@ -19,11 +23,13 @@ use crate::models::{ }; use deltalake_core::data_catalog::DataCatalogResult; -use deltalake_core::{DataCatalog, DataCatalogError}; +use deltalake_core::{DataCatalog, DataCatalogError, DeltaResult, DeltaTableBuilder, Path}; use crate::client::retry::*; -use deltalake_core::storage::str_is_truthy; - +use deltalake_core::storage::{ + factories, str_is_truthy, IORuntime, ObjectStoreFactory, ObjectStoreRef, RetryConfigParse, + StorageOptions, +}; pub mod client; pub mod credential; #[cfg(feature = "datafusion")] @@ -201,6 +207,11 @@ pub enum UnityCatalogConfigKey { /// - `azure_use_azure_cli` /// - `use_azure_cli` UseAzureCli, + + /// Allow http url (e.g. http://localhost:8080/api/2.1/...) + /// Supported keys: + /// - `unity_allow_http_url` + AllowHttpUrl, } impl FromStr for UnityCatalogConfigKey { @@ -246,6 +257,7 @@ impl FromStr for UnityCatalogConfigKey { | "unity_workspace_url" | "databricks_workspace_url" | "databricks_host" => Ok(UnityCatalogConfigKey::WorkspaceUrl), + "allow_http_url" | "unity_allow_http_url" => Ok(UnityCatalogConfigKey::AllowHttpUrl), _ => Err(DataCatalogError::UnknownConfigKey { catalog: "unity", key: s.to_string(), @@ -259,6 +271,7 @@ impl AsRef for UnityCatalogConfigKey { fn as_ref(&self) -> &str { match self { UnityCatalogConfigKey::AccessToken => "unity_access_token", + UnityCatalogConfigKey::AllowHttpUrl => "unity_allow_http_url", UnityCatalogConfigKey::AuthorityHost => "unity_authority_host", UnityCatalogConfigKey::AuthorityId => "unity_authority_id", UnityCatalogConfigKey::ClientId => "unity_client_id", @@ -311,6 +324,9 @@ pub struct UnityCatalogBuilder { /// When set to true, azure cli has to be used for acquiring access token use_azure_cli: bool, + /// When set to true, http will be allowed in the catalog url + allow_http_url: bool, + /// Retry config retry_config: RetryConfig, @@ -333,6 +349,9 @@ impl UnityCatalogBuilder { ) -> DataCatalogResult { match UnityCatalogConfigKey::from_str(key.as_ref())? { UnityCatalogConfigKey::AccessToken => self.bearer_token = Some(value.into()), + UnityCatalogConfigKey::AllowHttpUrl => { + self.allow_http_url = str_is_truthy(&value.into()) + } UnityCatalogConfigKey::ClientId => self.client_id = Some(value.into()), UnityCatalogConfigKey::ClientSecret => self.client_secret = Some(value.into()), UnityCatalogConfigKey::AuthorityId => self.authority_id = Some(value.into()), @@ -431,6 +450,45 @@ impl UnityCatalogBuilder { self } + /// Returns the storage location and temporary token to be used with the + /// Unity Catalog table. + pub async fn get_uc_location_and_token( + table_uri: &str, + ) -> Result<(String, HashMap), UnityCatalogError> { + let uri_parts: Vec<&str> = table_uri[5..].split('.').collect(); + if uri_parts.len() != 3 { + panic!("Invalid Unity Catalog URI: {}", table_uri); + } + + let catalog_id = uri_parts[0]; + let database_name = uri_parts[1]; + let table_name = uri_parts[2]; + + let unity_catalog = match UnityCatalogBuilder::from_env().build() { + Ok(uc) => uc, + Err(_e) => panic!("Unable to build Unity Catalog."), + }; + let storage_location = match unity_catalog + .get_table_storage_location(Some(catalog_id.to_string()), database_name, table_name) + .await + { + Ok(s) => s, + Err(_e) => panic!("Unable to find the table's storage location."), + }; + let temp_creds_res = unity_catalog + .get_temp_table_credentials(catalog_id, database_name, table_name) + .await?; + let credentials = match temp_creds_res { + TableTempCredentialsResponse::Success(temp_creds) => { + temp_creds.get_credentials().unwrap() + } + TableTempCredentialsResponse::Error(_error) => { + panic!("Unable to get temporary credentials from Unity Catalog.") + } + }; + Ok((storage_location, credentials)) + } + fn get_credential_provider(&self) -> Option { if let Some(token) = self.bearer_token.as_ref() { return Some(CredentialProvider::BearerToken(token.clone())); @@ -488,7 +546,12 @@ impl UnityCatalogBuilder { .trim_end_matches('/') .to_string(); - let client = self.client_options.client()?; + let client_options = if self.allow_http_url { + self.client_options.with_allow_http(true) + } else { + self.client_options + }; + let client = client_options.client()?; Ok(UnityCatalog { client, @@ -649,7 +712,7 @@ impl UnityCatalog { self.catalog_url(), catalog_id.as_ref(), database_name.as_ref(), - table_name.as_ref() + table_name.as_ref(), )) .header(AUTHORIZATION, token) .send() @@ -692,6 +755,67 @@ impl UnityCatalog { } } +#[derive(Clone, Default, Debug)] +pub struct UnityCatalogFactory {} + +impl RetryConfigParse for UnityCatalogFactory {} + +impl ObjectStoreFactory for UnityCatalogFactory { + fn parse_url_opts( + &self, + table_uri: &Url, + options: &StorageOptions, + ) -> DeltaResult<(ObjectStoreRef, Path)> { + use futures::executor::block_on; + + let result = block_on(UnityCatalogBuilder::get_uc_location_and_token( + table_uri.as_str(), + )); + + let (table_path, temp_creds) = match result { + Ok(tup) => tup, + Err(_err) => panic!("Unable to get UC location and token."), + }; + + let mut storage_options = options.0.clone(); + + if !temp_creds.is_empty() { + storage_options.extend(temp_creds); + } + + let mut builder = + DeltaTableBuilder::from_uri(&table_path).with_io_runtime(IORuntime::default()); + if !storage_options.is_empty() { + builder = builder.with_storage_options(storage_options.clone()); + } + + let prefix = Path::parse(table_uri.path())?; + let store = builder.build()?.object_store(); + + Ok((store, prefix)) + } +} + +impl LogStoreFactory for UnityCatalogFactory { + fn with_options( + &self, + store: ObjectStoreRef, + location: &Url, + options: &StorageOptions, + ) -> DeltaResult> { + Ok(default_logstore(store, location, options)) + } +} + +/// Register an [ObjectStoreFactory] for common UnityCatalogFactory [Url] schemes +pub fn register_handlers(_additional_prefixes: Option) { + let factory = Arc::new(UnityCatalogFactory::default()); + let scheme = "uc"; + let url = Url::parse(&format!("{}://", scheme)).unwrap(); + factories().insert(url.clone(), factory.clone()); + logstores().insert(url.clone(), factory.clone()); +} + #[async_trait::async_trait] impl DataCatalog for UnityCatalog { type Error = UnityCatalogError; @@ -731,6 +855,7 @@ mod tests { use crate::models::tests::{GET_SCHEMA_RESPONSE, GET_TABLE_RESPONSE, LIST_SCHEMAS_RESPONSE}; use crate::models::*; use crate::UnityCatalogBuilder; + use deltalake_core::DataCatalog; use httpmock::prelude::*; #[tokio::test] @@ -788,5 +913,15 @@ mod tests { get_table_response.unwrap(), GetTableResponse::Success(_) )); + + let storage_location = client + .get_table_storage_location( + Some("catalog_name".to_string()), + "schema_name", + "table_name", + ) + .await + .unwrap(); + assert!(storage_location.eq_ignore_ascii_case("string")); } } diff --git a/crates/core/src/table/builder.rs b/crates/core/src/table/builder.rs index 77a9fae20b..fa08715992 100644 --- a/crates/core/src/table/builder.rs +++ b/crates/core/src/table/builder.rs @@ -361,8 +361,9 @@ fn resolve_uri_type(table_uri: impl AsRef) -> DeltaResult { Ok(UriType::LocalPath(PathBuf::from(table_uri))) } else { Err(DeltaTableError::InvalidTableLocation(format!( - "Unknown scheme: {}", - scheme + "Unknown scheme: {}. Known schemes: {}", + scheme, + known_schemes.join(",") ))) } } else { diff --git a/crates/deltalake/src/lib.rs b/crates/deltalake/src/lib.rs index 60147f244e..a2887f2146 100644 --- a/crates/deltalake/src/lib.rs +++ b/crates/deltalake/src/lib.rs @@ -7,6 +7,8 @@ pub use deltalake_core::*; pub use deltalake_aws as aws; #[cfg(feature = "azure")] pub use deltalake_azure as azure; +#[cfg(feature = "unity-experimental")] +pub use deltalake_catalog_unity as unity_catalog; #[cfg(feature = "gcs")] pub use deltalake_gcp as gcp; #[cfg(feature = "hdfs")] diff --git a/python/src/lib.rs b/python/src/lib.rs index bad6eed2b5..2c50302073 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -2442,6 +2442,7 @@ fn _internal(m: &Bound<'_, PyModule>) -> PyResult<()> { deltalake::hdfs::register_handlers(None); deltalake_mount::register_handlers(None); deltalake::lakefs::register_handlers(None); + deltalake::unity_catalog::register_handlers(None); let py = m.py(); m.add("DeltaError", py.get_type_bound::())?; From a904e9a7af8f124edfcdbd6e8197b570fa4169d1 Mon Sep 17 00:00:00 2001 From: Omkar P <45419097+omkar-foss@users.noreply.github.com> Date: Thu, 13 Feb 2025 02:31:23 +0530 Subject: [PATCH 2/3] test(python): add tests with mock APIs for Unity Catalog variants Signed-off-by: Omkar P <45419097+omkar-foss@users.noreply.github.com> --- .../unitycatalog_databricks.json | 112 ++++++++++++++++++ .../mockoon_data_files/unitycatalog_oss.json | 112 ++++++++++++++++++ .github/workflows/python_build.yml | 60 +++++++++- crates/catalog-unity/src/lib.rs | 2 +- python/pyproject.toml | 2 + python/tests/test_unity_catalog.py | 52 ++++++++ 6 files changed, 338 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/mockoon_data_files/unitycatalog_databricks.json create mode 100644 .github/workflows/mockoon_data_files/unitycatalog_oss.json create mode 100644 python/tests/test_unity_catalog.py diff --git a/.github/workflows/mockoon_data_files/unitycatalog_databricks.json b/.github/workflows/mockoon_data_files/unitycatalog_databricks.json new file mode 100644 index 0000000000..861d52f69f --- /dev/null +++ b/.github/workflows/mockoon_data_files/unitycatalog_databricks.json @@ -0,0 +1,112 @@ +{ + "uuid": "0696d6d5-62b6-4a0f-8524-917b0c3848d4", + "lastMigration": 33, + "name": "Unity Catalog (Databricks) Test API", + "endpointPrefix": "", + "latency": 0, + "port": 8080, + "hostname": "", + "folders": [], + "routes": [ + { + "uuid": "16d88cdc-abcf-4b0d-a4d6-18a3205a9a6c", + "type": "http", + "documentation": "Temporary Table Credentials API", + "method": "post", + "endpoint": "api/2.1/unity-catalog/temporary-table-credentials", + "responses": [ + { + "uuid": "50bba5c1-1e2c-4d5b-a611-ab478320f2b0", + "body": "{\n \"aws_temp_credentials\": {\n \"access_key_id\": \"string\",\n \"secret_access_key\": \"string\",\n \"session_token\": \"string\",\n \"access_point\": \"string\"\n },\n \"azure_user_delegation_sas\": {\n \"sas_token\": \"string\"\n },\n \"r2_temp_credentials\": {\n \"access_key_id\": \"string\",\n \"secret_access_key\": \"string\",\n \"session_token\": \"string\"\n },\n \"expiration_time\": 0,\n \"url\": \"string\"\n}", + "latency": 0, + "statusCode": 200, + "label": "Default response", + "headers": [], + "bodyType": "INLINE", + "filePath": "", + "databucketID": "", + "sendFileAsBody": false, + "rules": [], + "rulesOperator": "OR", + "disableTemplating": false, + "fallbackTo404": false, + "default": true, + "crudKey": "id", + "callbacks": [] + } + ], + "responseMode": null, + "streamingMode": null, + "streamingInterval": 0 + }, + { + "uuid": "66f51ad5-2ba3-4cb0-b633-c362a9f75836", + "type": "http", + "documentation": "Get Table Details API", + "method": "get", + "endpoint": "api/2.1/unity-catalog/tables/unity.default.testtable", + "responses": [ + { + "uuid": "da280b2c-07b6-45ee-8da7-92c50e4e540e", + "body": "{\n \"name\": \"string\",\n \"catalog_name\": \"string\",\n \"schema_name\": \"string\",\n \"table_type\": \"MANAGED\",\n \"data_source_format\": \"DELTA\",\n \"columns\": [\n {\n \"name\": \"string\",\n \"type_text\": \"string\",\n \"type_name\": \"BOOLEAN\",\n \"position\": 0,\n \"type_precision\": 0,\n \"type_scale\": 0,\n \"type_interval_type\": \"string\",\n \"type_json\": \"string\",\n \"comment\": \"string\",\n \"nullable\": true,\n \"partition_index\": 0,\n \"mask\": {\n \"function_name\": \"string\",\n \"using_column_names\": [\n \"string\"\n ]\n }\n }\n ],\n \"storage_location\": \"../crates/test/tests/data/delta-0.8.0-partitioned\",\n \"view_definition\": \"string\",\n \"view_dependencies\": {\n \"dependencies\": [\n {\n \"table\": {\n \"table_full_name\": \"string\"\n },\n \"function\": {\n \"function_full_name\": \"string\"\n }\n }\n ]\n },\n \"sql_path\": \"string\",\n \"owner\": \"string\",\n \"comment\": \"string\",\n \"properties\": {\n \"property1\": \"string\",\n \"property2\": \"string\"\n },\n \"storage_credential_name\": \"string\",\n \"table_constraints\": [\n {\n \"primary_key_constraint\": {\n \"name\": \"string\",\n \"child_columns\": [\n \"string\"\n ]\n },\n \"foreign_key_constraint\": {\n \"name\": \"string\",\n \"child_columns\": [\n \"string\"\n ],\n \"parent_table\": \"string\",\n \"parent_columns\": [\n \"string\"\n ]\n },\n \"named_table_constraint\": {\n \"name\": \"string\"\n }\n }\n ],\n \"row_filter\": {\n \"function_name\": \"string\",\n \"input_column_names\": [\n \"string\"\n ]\n },\n \"enable_predictive_optimization\": \"DISABLE\",\n \"metastore_id\": \"string\",\n \"full_name\": \"string\",\n \"data_access_configuration_id\": \"string\",\n \"created_at\": 0,\n \"created_by\": \"string\",\n \"updated_at\": 0,\n \"updated_by\": \"string\",\n \"deleted_at\": 0,\n \"table_id\": \"string\",\n \"delta_runtime_properties_kvpairs\": {\n \"delta_runtime_properties\": {\n \"property1\": \"string\",\n \"property2\": \"string\"\n }\n },\n \"effective_predictive_optimization_flag\": {\n \"value\": \"DISABLE\",\n \"inherited_from_type\": \"CATALOG\",\n \"inherited_from_name\": \"string\"\n },\n \"access_point\": \"string\",\n \"pipeline_id\": \"string\",\n \"browse_only\": true\n}", + "latency": 0, + "statusCode": 200, + "label": "Default response", + "headers": [], + "bodyType": "INLINE", + "filePath": "", + "databucketID": "", + "sendFileAsBody": false, + "rules": [], + "rulesOperator": "OR", + "disableTemplating": false, + "fallbackTo404": false, + "default": true, + "crudKey": "id", + "callbacks": [] + } + ], + "responseMode": null, + "streamingMode": null, + "streamingInterval": 0 + } + ], + "rootChildren": [ + { + "type": "route", + "uuid": "16d88cdc-abcf-4b0d-a4d6-18a3205a9a6c" + }, + { + "type": "route", + "uuid": "66f51ad5-2ba3-4cb0-b633-c362a9f75836" + } + ], + "proxyMode": false, + "proxyHost": "", + "proxyRemovePrefix": false, + "tlsOptions": { + "enabled": false, + "type": "CERT", + "pfxPath": "", + "certPath": "", + "keyPath": "", + "caPath": "", + "passphrase": "" + }, + "cors": true, + "headers": [], + "proxyReqHeaders": [ + { + "key": "", + "value": "" + } + ], + "proxyResHeaders": [ + { + "key": "", + "value": "" + } + ], + "data": [], + "callbacks": [] +} \ No newline at end of file diff --git a/.github/workflows/mockoon_data_files/unitycatalog_oss.json b/.github/workflows/mockoon_data_files/unitycatalog_oss.json new file mode 100644 index 0000000000..584389d5d6 --- /dev/null +++ b/.github/workflows/mockoon_data_files/unitycatalog_oss.json @@ -0,0 +1,112 @@ +{ + "uuid": "0696d6d5-62b6-4a0f-8524-917b0c3848d4", + "lastMigration": 33, + "name": "Unity Catalog (OSS) Test API", + "endpointPrefix": "", + "latency": 0, + "port": 8080, + "hostname": "", + "folders": [], + "routes": [ + { + "uuid": "16d88cdc-abcf-4b0d-a4d6-18a3205a9a6c", + "type": "http", + "documentation": "Temporary Table Credentials API", + "method": "post", + "endpoint": "api/2.1/unity-catalog/temporary-table-credentials", + "responses": [ + { + "uuid": "50bba5c1-1e2c-4d5b-a611-ab478320f2b0", + "body": "{\n \"aws_temp_credentials\": {\n \"access_key_id\": \"string\",\n \"secret_access_key\": \"string\",\n \"session_token\": \"string\"\n },\n \"azure_user_delegation_sas\": {\n \"sas_token\": \"string\"\n },\n \"gcp_oauth_token\": {\n \"oauth_token\": \"string\"\n },\n \"expiration_time\": 0\n}", + "latency": 0, + "statusCode": 200, + "label": "Default response", + "headers": [], + "bodyType": "INLINE", + "filePath": "", + "databucketID": "", + "sendFileAsBody": false, + "rules": [], + "rulesOperator": "OR", + "disableTemplating": false, + "fallbackTo404": false, + "default": true, + "crudKey": "id", + "callbacks": [] + } + ], + "responseMode": null, + "streamingMode": null, + "streamingInterval": 0 + }, + { + "uuid": "66f51ad5-2ba3-4cb0-b633-c362a9f75836", + "type": "http", + "documentation": "Get Table Details API", + "method": "get", + "endpoint": "api/2.1/unity-catalog/tables/unity.default.testtable", + "responses": [ + { + "uuid": "da280b2c-07b6-45ee-8da7-92c50e4e540e", + "body": "{\n \"name\": \"string\",\n \"catalog_name\": \"string\",\n \"schema_name\": \"string\",\n \"table_type\": \"MANAGED\",\n \"data_source_format\": \"DELTA\",\n \"columns\": [\n {\n \"name\": \"string\",\n \"type_text\": \"string\",\n \"type_json\": \"string\",\n \"type_name\": \"BOOLEAN\",\n \"type_precision\": 0,\n \"type_scale\": 0,\n \"type_interval_type\": \"string\",\n \"position\": 0,\n \"comment\": \"string\",\n \"nullable\": true,\n \"partition_index\": 0\n }\n ],\n \"storage_location\": \"string\",\n \"comment\": \"string\",\n \"properties\": {\n \"additionalProp1\": \"string\",\n \"additionalProp2\": \"string\",\n \"additionalProp3\": \"string\"\n },\n \"owner\": \"string\",\n \"created_at\": 0,\n \"created_by\": \"string\",\n \"updated_at\": 0,\n \"updated_by\": \"string\",\n \"table_id\": \"string\"\n}", + "latency": 0, + "statusCode": 200, + "label": "Default response", + "headers": [], + "bodyType": "INLINE", + "filePath": "", + "databucketID": "", + "sendFileAsBody": false, + "rules": [], + "rulesOperator": "OR", + "disableTemplating": false, + "fallbackTo404": false, + "default": true, + "crudKey": "id", + "callbacks": [] + } + ], + "responseMode": null, + "streamingMode": null, + "streamingInterval": 0 + } + ], + "rootChildren": [ + { + "type": "route", + "uuid": "16d88cdc-abcf-4b0d-a4d6-18a3205a9a6c" + }, + { + "type": "route", + "uuid": "66f51ad5-2ba3-4cb0-b633-c362a9f75836" + } + ], + "proxyMode": false, + "proxyHost": "", + "proxyRemovePrefix": false, + "tlsOptions": { + "enabled": false, + "type": "CERT", + "pfxPath": "", + "certPath": "", + "keyPath": "", + "caPath": "", + "passphrase": "" + }, + "cors": true, + "headers": [], + "proxyReqHeaders": [ + { + "key": "", + "value": "" + } + ], + "proxyResHeaders": [ + { + "key": "", + "value": "" + } + ], + "data": [], + "callbacks": [] +} \ No newline at end of file diff --git a/.github/workflows/python_build.yml b/.github/workflows/python_build.yml index acdd2c9cd6..8a2ff3da9c 100644 --- a/.github/workflows/python_build.yml +++ b/.github/workflows/python_build.yml @@ -84,7 +84,7 @@ jobs: uv pip uninstall pandas uv run --no-sync pytest -m "not pandas and not integration and not benchmark" uv pip install pandas - + test-lakefs: name: Python Build (Python 3.10 LakeFS Integration tests) runs-on: ubuntu-latest @@ -110,6 +110,64 @@ jobs: - name: Run tests run: uv run --no-sync pytest -m '(lakefs and integration)' --doctest-modules + test-unitycatalog-databricks: + name: Python Build (Python 3.10 Unity Catalog Integration tests) + runs-on: ubuntu-latest + env: + RUSTFLAGS: "-C debuginfo=1" + CARGO_INCREMENTAL: 0 + + steps: + - uses: actions/checkout@v3 + + - name: Setup Environment + uses: ./.github/actions/setup-env + + - name: Run Mockoon CLI + uses: mockoon/cli-action@v2 + with: + version: "latest" + data-file: "./mockoon_data_files/unitycatalog_databricks.json" + port: 8080 + + - name: Build and install deltalake + run: make develop + + - name: Download Data Acceptance Tests (DAT) files + run: make setup-dat + + - name: Run tests + run: uv run --no-sync pytest -m '(unitycatalog_databricks and integration)' --doctest-modules + + test-unitycatalog-oss: + name: Python Build (Python 3.10 Unity Catalog Integration tests) + runs-on: ubuntu-latest + env: + RUSTFLAGS: "-C debuginfo=1" + CARGO_INCREMENTAL: 0 + + steps: + - uses: actions/checkout@v3 + + - name: Setup Environment + uses: ./.github/actions/setup-env + + - name: Run Mockoon CLI + uses: mockoon/cli-action@v2 + with: + version: "latest" + data-file: "./mockoon_data_files/unitycatalog_oss.json" + port: 8080 + + - name: Build and install deltalake + run: make develop + + - name: Download Data Acceptance Tests (DAT) files + run: make setup-dat + + - name: Run tests + run: uv run --no-sync pytest -m '(unitycatalog_oss and integration)' --doctest-modules + test-pyspark: name: PySpark Integration Tests runs-on: ubuntu-latest diff --git a/crates/catalog-unity/src/lib.rs b/crates/catalog-unity/src/lib.rs index fe5ca1b3e9..0cd81f55d0 100644 --- a/crates/catalog-unity/src/lib.rs +++ b/crates/catalog-unity/src/lib.rs @@ -473,7 +473,7 @@ impl UnityCatalogBuilder { .await { Ok(s) => s, - Err(_e) => panic!("Unable to find the table's storage location."), + Err(err) => panic!("Unable to find the table's storage location. {}", err), }; let temp_creds_res = unity_catalog .get_temp_table_credentials(catalog_id, database_name, table_name) diff --git a/python/pyproject.toml b/python/pyproject.toml index b7dd918fe6..070758fb61 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -97,6 +97,8 @@ markers = [ "pandas: marks tests that require pandas", "polars: marks tests that require polars", "lakefs: marks tests that require lakefs", + "unitycatalog_databricks: marks tests that require unitycatalog_databricks", + "unitycatalog_oss: marks tests that require unitycatalog_oss", "pyspark: marks tests that require pyspark", ] diff --git a/python/tests/test_unity_catalog.py b/python/tests/test_unity_catalog.py new file mode 100644 index 0000000000..7b3372845a --- /dev/null +++ b/python/tests/test_unity_catalog.py @@ -0,0 +1,52 @@ +import os + +import pytest + +from deltalake import DeltaTable + + +@pytest.mark.unitycatalog_databricks +@pytest.mark.integration +@pytest.mark.timeout(timeout=10, method="thread") +def test_uc_read_deltatable(): + """Test delta table reads using Unity Catalog URL (uc://)""" + + os.environ["DATABRICKS_WORKSPACE_URL"] = "http://localhost:8080" + os.environ["DATABRICKS_ACCESS_TOKEN"] = "123456" + os.environ["UNITY_ALLOW_HTTP_URL"] = "true" + + dt = DeltaTable("uc://unity.default.testtable") + + assert dt.is_deltatable(dt.table_uri), True + expected = { + "value": ["1", "2", "3", "6", "7", "5", "4"], + "year": ["2020", "2020", "2020", "2021", "2021", "2021", "2021"], + "month": ["1", "2", "2", "12", "12", "12", "4"], + "day": ["1", "3", "5", "20", "20", "4", "5"], + } + assert dt.to_pyarrow_dataset().to_table().to_pydict() == expected + + +@pytest.mark.unitycatalog_oss +@pytest.mark.integration +@pytest.mark.timeout(timeout=10, method="thread") +def test_uc_read_deltatable_failing(): + """Test delta table reads using Unity Catalog URL (uc://)""" + + os.environ["DATABRICKS_WORKSPACE_URL"] = "http://localhost:8080" + os.environ["DATABRICKS_ACCESS_TOKEN"] = "123456" + os.environ["UNITY_ALLOW_HTTP_URL"] = "true" + + # @TODO: Currently, this will fail when used with Unity Catalog OSS + # mock APIs. Need to add support for slightly different response payloads + # of Unity Catalog OSS. + with pytest.raises(BaseException): + dt = DeltaTable("uc://unity.default.testtable") + assert dt.is_deltatable(dt.table_uri), True + expected = { + "value": ["1", "2", "3", "6", "7", "5", "4"], + "year": ["2020", "2020", "2020", "2021", "2021", "2021", "2021"], + "month": ["1", "2", "2", "12", "12", "12", "4"], + "day": ["1", "3", "5", "20", "20", "4", "5"], + } + assert dt.to_pyarrow_dataset().to_table().to_pydict() == expected From 734d4cb39df96f92ccd4d29fe0bec77854f31e45 Mon Sep 17 00:00:00 2001 From: Omkar P <45419097+omkar-foss@users.noreply.github.com> Date: Thu, 13 Feb 2025 17:54:30 +0530 Subject: [PATCH 3/3] fix: update mockoon datafiles path for github actions Signed-off-by: Omkar P <45419097+omkar-foss@users.noreply.github.com> --- .../mockoon_data_files/unitycatalog_databricks.json | 0 .../{workflows => }/mockoon_data_files/unitycatalog_oss.json | 0 .github/workflows/python_build.yml | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename .github/{workflows => }/mockoon_data_files/unitycatalog_databricks.json (100%) rename .github/{workflows => }/mockoon_data_files/unitycatalog_oss.json (100%) diff --git a/.github/workflows/mockoon_data_files/unitycatalog_databricks.json b/.github/mockoon_data_files/unitycatalog_databricks.json similarity index 100% rename from .github/workflows/mockoon_data_files/unitycatalog_databricks.json rename to .github/mockoon_data_files/unitycatalog_databricks.json diff --git a/.github/workflows/mockoon_data_files/unitycatalog_oss.json b/.github/mockoon_data_files/unitycatalog_oss.json similarity index 100% rename from .github/workflows/mockoon_data_files/unitycatalog_oss.json rename to .github/mockoon_data_files/unitycatalog_oss.json diff --git a/.github/workflows/python_build.yml b/.github/workflows/python_build.yml index 8a2ff3da9c..23ff65073d 100644 --- a/.github/workflows/python_build.yml +++ b/.github/workflows/python_build.yml @@ -127,7 +127,7 @@ jobs: uses: mockoon/cli-action@v2 with: version: "latest" - data-file: "./mockoon_data_files/unitycatalog_databricks.json" + data-file: ".github/mockoon_data_files/unitycatalog_databricks.json" port: 8080 - name: Build and install deltalake @@ -156,7 +156,7 @@ jobs: uses: mockoon/cli-action@v2 with: version: "latest" - data-file: "./mockoon_data_files/unitycatalog_oss.json" + data-file: ".github/mockoon_data_files/unitycatalog_oss.json" port: 8080 - name: Build and install deltalake