Skip to content

Commit

Permalink
Use 350db143 jars in integration tests on [databricks] 14.3 (#12082)
Browse files Browse the repository at this point in the history
Resolves #11988
Resolves #11990
Resolves #12020 

 and related issues

- Consolidate DBR-specific logic in jenkins/databricks/common_vars.sh
- Add DBR versions suffix when necessary

---------

Signed-off-by: Gera Shegalov <gshegalov@nvidia.com>
  • Loading branch information
gerashegalov authored Feb 10, 2025
1 parent 9122ab0 commit 1fff480
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 37 deletions.
35 changes: 0 additions & 35 deletions integration_tests/run_pyspark_from_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -287,41 +287,6 @@ else

export PYSP_TEST_spark_hadoop_hive_exec_scratchdir="$RUN_DIR/hive"

# Extract Databricks version from deployed configs.
# spark.databricks.clusterUsageTags.sparkVersion is set automatically on Databricks
# notebooks but not when running Spark manually.
#
# At the OS level the DBR version can be obtailed via
# 1. DATABRICKS_RUNTIME_VERSION environment set by Databricks, e.g., 11.3
# 2. File at /databricks/DBR_VERSION created by Databricks, e.g., 11.3
# 3. The value for Spark conf in file /databricks/common/conf/deploy.conf created by Databricks,
# e.g. 11.3.x-gpu-ml-scala2.12
#
# For cases 1 and 2 append '.' for version matching in 3XYdb SparkShimServiceProvider
#
DBR_VERSION=/databricks/DBR_VERSION
DB_DEPLOY_CONF=/databricks/common/conf/deploy.conf
if [[ -n "${DATABRICKS_RUNTIME_VERSION}" ]]; then
export PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion="${DATABRICKS_RUNTIME_VERSION}."
elif [[ -f $DBR_VERSION || -f $DB_DEPLOY_CONF ]]; then
DB_VER="$(< ${DBR_VERSION})." || \
DB_VER=$(grep spark.databricks.clusterUsageTags.sparkVersion $DB_DEPLOY_CONF | sed -e 's/.*"\(.*\)".*/\1/')
# if we did not error out on reads we should have at least four characters "x.y."
if (( ${#DB_VER} < 4 )); then
echo >&2 "Unable to determine Databricks version, unexpected length of: ${DB_VER}"
exit 1
fi
export PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion=$DB_VER
else
cat << EOF
This node does not define
- DATABRICKS_RUNTIME_VERSION environment,
- Files containing version information: $DBR_VERSION, $DB_DEPLOY_CONF
Proceeding assuming a non-Databricks environment.
EOF

fi

# Set spark.task.maxFailures for most schedulers.
#
Expand Down
49 changes: 47 additions & 2 deletions jenkins/databricks/common_vars.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2023-2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,52 @@ if [ -n "$EXTRA_ENVS" ]; then
fi

SPARK_VER=${SPARK_VER:-$(< /databricks/spark/VERSION)}
export SPARK_SHIM_VER=${SPARK_SHIM_VER:-spark${SPARK_VER//.}db}


# Extract Databricks version from deployed configs.
# spark.databricks.clusterUsageTags.sparkVersion is set automatically on Databricks
# notebooks but not when running Spark manually.
#
# At the OS level the DBR version can be obtailed via
# 1. DATABRICKS_RUNTIME_VERSION environment set by Databricks, e.g., 11.3
# 2. File at /databricks/DBR_VERSION created by Databricks, e.g., 11.3
# 3. The value for Spark conf in file /databricks/common/conf/deploy.conf created by Databricks,
# e.g. 11.3.x-gpu-ml-scala2.12
#
# For cases 1 and 2 append '.' for version matching in 3XYdb SparkShimServiceProvider
#
DBR_VERSION=/databricks/DBR_VERSION
DB_DEPLOY_CONF=/databricks/common/conf/deploy.conf
if [[ -n "${DATABRICKS_RUNTIME_VERSION}" ]]; then
export PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion="${DATABRICKS_RUNTIME_VERSION}."
elif [[ -f $DBR_VERSION || -f $DB_DEPLOY_CONF ]]; then
DB_VER="$(< ${DBR_VERSION})." || \
DB_VER=$(grep spark.databricks.clusterUsageTags.sparkVersion $DB_DEPLOY_CONF | sed -e 's/.*"\(.*\)".*/\1/')
# if we did not error out on reads we should have at least four characters "x.y."
if (( ${#DB_VER} < 4 )); then
echo >&2 "Unable to determine Databricks version, unexpected length of: ${DB_VER}"
exit 1
fi
export PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion=$DB_VER
else
cat << EOF
This node does not define
- DATABRICKS_RUNTIME_VERSION environment,
- Files containing version information: $DBR_VERSION, $DB_DEPLOY_CONF
Proceeding assuming a non-Databricks environment.
EOF

fi

# TODO make this standard going forward
if [[ "$SPARK_VER" == '3.5.0' ]]; then
DB_VER_SUFFIX="${PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion//./}"
else
DB_VER_SUFFIX=""
fi

export SPARK_SHIM_VER=${SPARK_SHIM_VER:-"spark${SPARK_VER//.}db${DB_VER_SUFFIX}"}

# Setup SPARK_HOME if need
if [[ -z "$SPARK_HOME" ]]; then
Expand Down

0 comments on commit 1fff480

Please sign in to comment.