Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run Spark JAR task test on multiple DBR versions #1665

Merged
merged 3 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ bundle:

workspace:
root_path: "~/.bundle/{{.unique_id}}"
artifact_path: {{.artifact_path}}

artifacts:
my_java_code:
Expand All @@ -27,3 +26,30 @@ resources:
main_class_name: PrintArgs
libraries:
- jar: ./{{.project_name}}/PrintArgs.jar

targets:
volume:
# Override the artifact path to upload artifacts to a volume path
workspace:
artifact_path: {{.artifact_path}}

resources:
jobs:
jar_job:
tasks:
- task_key: TestSparkJarTask
new_cluster:

# Force cluster to run in single user mode (force it to be a UC cluster)
data_security_mode: SINGLE_USER

workspace:
resources:
jobs:
jar_job:
tasks:
- task_key: TestSparkJarTask
new_cluster:

# Force cluster to run in no isolation mode (force it to be a non-UC cluster)
data_security_mode: NONE
11 changes: 7 additions & 4 deletions internal/bundle/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/internal"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/env"
"github.com/databricks/cli/libs/flags"
"github.com/databricks/cli/libs/template"
"github.com/databricks/databricks-sdk-go"
Expand Down Expand Up @@ -56,21 +57,21 @@ func writeConfigFile(t *testing.T, config map[string]any) (string, error) {
}

func validateBundle(t *testing.T, ctx context.Context, path string) ([]byte, error) {
t.Setenv("BUNDLE_ROOT", path)
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

t.Setenv prevents tests from running in parallel (the env is global state).

We use libs/env everywhere to access environment variables. This allows for overriding actual environment variables in the context. This in turn allows us to run tests in parallel.

c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "validate", "--output", "json")
stdout, _, err := c.Run()
return stdout.Bytes(), err
}

func deployBundle(t *testing.T, ctx context.Context, path string) error {
t.Setenv("BUNDLE_ROOT", path)
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "deploy", "--force-lock", "--auto-approve")
_, _, err := c.Run()
return err
}

func deployBundleWithFlags(t *testing.T, ctx context.Context, path string, flags []string) error {
t.Setenv("BUNDLE_ROOT", path)
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
args := []string{"bundle", "deploy", "--force-lock"}
args = append(args, flags...)
c := internal.NewCobraTestRunnerWithContext(t, ctx, args...)
Expand All @@ -79,6 +80,7 @@ func deployBundleWithFlags(t *testing.T, ctx context.Context, path string, flags
}

func runResource(t *testing.T, ctx context.Context, path string, key string) (string, error) {
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
ctx = cmdio.NewContext(ctx, cmdio.Default())

c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "run", key)
Expand All @@ -87,6 +89,7 @@ func runResource(t *testing.T, ctx context.Context, path string, key string) (st
}

func runResourceWithParams(t *testing.T, ctx context.Context, path string, key string, params ...string) (string, error) {
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
ctx = cmdio.NewContext(ctx, cmdio.Default())

args := make([]string, 0)
Expand All @@ -98,7 +101,7 @@ func runResourceWithParams(t *testing.T, ctx context.Context, path string, key s
}

func destroyBundle(t *testing.T, ctx context.Context, path string) error {
t.Setenv("BUNDLE_ROOT", path)
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "destroy", "--auto-approve")
_, _, err := c.Run()
return err
Expand Down
75 changes: 61 additions & 14 deletions internal/bundle/spark_jar_test.go
Original file line number Diff line number Diff line change
@@ -1,28 +1,19 @@
package bundle

import (
"os"
"context"
"testing"

"github.com/databricks/cli/internal"
"github.com/databricks/cli/internal/acc"
"github.com/databricks/cli/internal/testutil"
"github.com/databricks/cli/libs/env"
"github.com/google/uuid"
"github.com/stretchr/testify/require"
)

func runSparkJarTest(t *testing.T, sparkVersion string) {
func runSparkJarTestCommon(t *testing.T, ctx context.Context, sparkVersion string, artifactPath string) {
cloudEnv := internal.GetEnvOrSkipTest(t, "CLOUD_ENV")
t.Log(cloudEnv)

if os.Getenv("TEST_METASTORE_ID") == "" {
t.Skip("Skipping tests that require a UC Volume when metastore id is not set.")
}

ctx, wt := acc.WorkspaceTest(t)
w := wt.W
volumePath := internal.TemporaryUcVolume(t, w)

nodeTypeId := internal.GetNodeTypeId(cloudEnv)
tmpDir := t.TempDir()
instancePoolId := env.Get(ctx, "TEST_INSTANCE_POOL_ID")
Expand All @@ -31,7 +22,7 @@ func runSparkJarTest(t *testing.T, sparkVersion string) {
"unique_id": uuid.New().String(),
"spark_version": sparkVersion,
"root": tmpDir,
"artifact_path": volumePath,
"artifact_path": artifactPath,
"instance_pool_id": instancePoolId,
}, tmpDir)
require.NoError(t, err)
Expand All @@ -48,6 +39,62 @@ func runSparkJarTest(t *testing.T, sparkVersion string) {
require.Contains(t, out, "Hello from Jar!")
}

func runSparkJarTestFromVolume(t *testing.T, sparkVersion string) {
ctx, wt := acc.UcWorkspaceTest(t)
volumePath := internal.TemporaryUcVolume(t, wt.W)
ctx = env.Set(ctx, "DATABRICKS_BUNDLE_TARGET", "volume")
runSparkJarTestCommon(t, ctx, sparkVersion, volumePath)
}

func runSparkJarTestFromWorkspace(t *testing.T, sparkVersion string) {
ctx, _ := acc.WorkspaceTest(t)
ctx = env.Set(ctx, "DATABRICKS_BUNDLE_TARGET", "workspace")
runSparkJarTestCommon(t, ctx, sparkVersion, "n/a")
}

func TestAccSparkJarTaskDeployAndRunOnVolumes(t *testing.T) {
runSparkJarTest(t, "14.3.x-scala2.12")
internal.GetEnvOrSkipTest(t, "CLOUD_ENV")
testutil.RequireJDK(t, context.Background(), "1.8.0")

// Failure on earlier DBR versions:
//
// JAR installation from Volumes is supported on UC Clusters with DBR >= 13.3.
// Denied library is Jar(/Volumes/main/test-schema-ldgaklhcahlg/my-volume/.internal/PrintArgs.jar)
//

versions := []string{
"13.3.x-scala2.12", // 13.3 LTS (includes Apache Spark 3.4.1, Scala 2.12)
"14.3.x-scala2.12", // 14.3 LTS (includes Apache Spark 3.5.0, Scala 2.12)
"15.4.x-scala2.12", // 15.4 LTS Beta (includes Apache Spark 3.5.0, Scala 2.12)
}

for _, version := range versions {
t.Run(version, func(t *testing.T) {
t.Parallel()
runSparkJarTestFromVolume(t, version)
})
}
}

func TestAccSparkJarTaskDeployAndRunOnWorkspace(t *testing.T) {
internal.GetEnvOrSkipTest(t, "CLOUD_ENV")
testutil.RequireJDK(t, context.Background(), "1.8.0")

// Failure on earlier DBR versions:
//
// Library from /Workspace is not allowed on this cluster.
// Please switch to using DBR 14.1+ No Isolation Shared or DBR 13.1+ Shared cluster or 13.2+ Assigned cluster to use /Workspace libraries.
//

versions := []string{
"14.3.x-scala2.12", // 14.3 LTS (includes Apache Spark 3.5.0, Scala 2.12)
"15.4.x-scala2.12", // 15.4 LTS Beta (includes Apache Spark 3.5.0, Scala 2.12)
}

for _, version := range versions {
t.Run(version, func(t *testing.T) {
t.Parallel()
runSparkJarTestFromWorkspace(t, version)
})
}
}
24 changes: 24 additions & 0 deletions internal/testutil/jdk.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package testutil

import (
"bytes"
"context"
"os/exec"
"strings"
"testing"

"github.com/stretchr/testify/require"
)

func RequireJDK(t *testing.T, ctx context.Context, version string) {
var stderr bytes.Buffer

cmd := exec.Command("javac", "-version")
cmd.Stderr = &stderr
err := cmd.Run()
require.NoError(t, err, "Unable to run javac -version")

// Get the first line of the output
line := strings.Split(stderr.String(), "\n")[0]
require.Contains(t, line, version, "Expected JDK version %s, got %s", version, line)
}
Loading