-
Notifications
You must be signed in to change notification settings - Fork 34
Update Spark CLI for extraJavaOptions
and Fix SubmitBuilder tests
#370
Changes from all commits
6d818a8
ddf4f29
03a7545
b8a4440
37e6592
d3d46d0
66b2cbc
e4c0a42
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,6 +31,7 @@ type sparkVal struct { | |
func (f *sparkVal) flag(section *kingpin.Application) *kingpin.Clause { | ||
return section.Flag(f.flagName, fmt.Sprintf("%s (%s)", f.desc, f.propName)) | ||
} | ||
|
||
func newSparkVal(flagName, propName, desc string) *sparkVal { | ||
return &sparkVal{flagName, propName, desc, "", false} | ||
} | ||
|
@@ -197,6 +198,10 @@ Args: | |
val.flag(submit).StringVar(&val.s) | ||
args.stringVals = append(args.stringVals, val) | ||
|
||
val = newSparkVal("executor-java-options", "spark.executor.extraJavaOptions", "Extra Java options to pass to the executors.") | ||
val.flag(submit).StringVar(&val.s) | ||
args.stringVals = append(args.stringVals, val) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if we need to add this new option. The user can specify this with "--conf spark.executor.extraJavaOptions". |
||
val = newSparkVal("driver-library-path", "spark.driver.extraLibraryPath", "Extra library path entries to pass to the driver.") | ||
val.flag(submit).StringVar(&val.s) | ||
args.stringVals = append(args.stringVals, val) | ||
|
@@ -282,7 +287,7 @@ func parseApplicationFile(args *sparkArgs) error { | |
|
||
func cleanUpSubmitArgs(argsStr string, boolVals []*sparkVal) ([]string, []string) { | ||
|
||
// collapse two or more spaces to one. | ||
// collapse two or more spaces to one. | ||
argsCompacted := collapseSpacesPattern.ReplaceAllString(argsStr, " ") | ||
// clean up any instances of shell-style escaped newlines: "arg1\\narg2" => "arg1 arg2" | ||
argsCleaned := strings.TrimSpace(backslashNewlinePattern.ReplaceAllLiteralString(argsCompacted, " ")) | ||
|
@@ -292,6 +297,7 @@ func cleanUpSubmitArgs(argsStr string, boolVals []*sparkVal) ([]string, []string | |
argsEquals := make([]string, 0) | ||
appFlags := make([]string, 0) | ||
i := 0 | ||
inQuotes := false | ||
ARGLOOP: | ||
for i < len(args) { | ||
arg := args[i] | ||
|
@@ -322,11 +328,16 @@ ARGLOOP: | |
} | ||
break | ||
} | ||
// Parse Spark configuration: | ||
// join this arg to the next arg if...: | ||
// 1. we're not at the last arg in the array | ||
// 2. we start with "--" | ||
// 3. we don't already contain "=" (already joined) | ||
// 4. we aren't a boolean value (no val to join) | ||
|
||
|
||
// if this is a configuration flag like --conf or --driver-driver-options that doesn't have a | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo: "--driver-driver-options" should be "--driver-java-options" |
||
// '=' for assignment. | ||
if i < len(args)-1 && strings.HasPrefix(arg, "--") && !strings.Contains(arg, "=") { | ||
// check for boolean: | ||
for _, boolVal := range boolVals { | ||
|
@@ -336,12 +347,36 @@ ARGLOOP: | |
continue ARGLOOP | ||
} | ||
} | ||
// merge this --key against the following val to get --key=val | ||
argsEquals = append(argsEquals, arg+"="+args[i+1]) | ||
|
||
// if this is the beginning of a string of args e.g. '-Djava.option=setting -Djava.paramter=nonsense' | ||
// we want to remove the leading single quote. Also remove internal quotes when the arg == --conf or some | ||
// other named configuration | ||
// e.g.: next = spark.driver.extraJavaOptions='-Djava.something=somethingelse | ||
// arg = --conf | ||
arg = strings.TrimPrefix(arg, "'") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't quite follow why we need to trim the prefix here and then check why |
||
next := args[i + 1] | ||
if strings.HasPrefix(next, "'") { // e.g. --driver-java-options '-Djava.config=setting... <-- next | ||
inQuotes = true | ||
} | ||
next = strings.Replace(next, "'", "", -1) // remove internal quotes | ||
argsEquals = append(argsEquals, arg + "=" + next) | ||
i += 2 | ||
} else if strings.HasSuffix(arg, "'") { // attach the final arg to the string of args without the quote | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about escaped quotes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just added the test: java_options = []string{"-Djava.thirdConfig=\\'thirdSetting\\'"}
inputArgs = "--driver-java-option='-Djava.thirdConfig=\\'thirdSetting\\'' --conf spark.cores.max=8"
suite.testLongArgInternal(inputArgs, java_options) and get the following stacktrace:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding the test: inputArgs := "--driver-java-option='-Djava.thirdConfig=assetting --conf spark.cores.max=8"
java_options := []string{"-Djava.thirdConfig=thirdSetting"}
suite.testLongArgInternal(inputArgs, java_options) Yields:
I would expect a relevant error code instead. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The following test case: inputArgs := "--driver-java-option='-Djava.thirdConfig=\"a setting with a space\"' --conf spark.cores.max=8"
javaOptions := []string{"-Djava.thirdConfig=\"a setting with a space\""}
suite.testLongArgInternal(inputArgs, javaOptions) Yields:
I would expect the spaces in the argument to be maintained. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The option here is misspelled ("--driver-java-option"), missing an "s" at the end. So, this may be a different issue. |
||
inQuotes = false // has suffix means we're out of the quotes | ||
arg = strings.TrimSuffix(arg, "'") | ||
argsEquals[len(argsEquals) - 1] = argsEquals[len(argsEquals) - 1] + " " + arg | ||
i += 1 | ||
} else { | ||
// already joined or at the end, pass through: | ||
argsEquals = append(argsEquals, arg) | ||
cleanedArg := strings.Replace(arg, "'", "", -1) | ||
if inQuotes { // join this arg to the last one because it's all in quotes | ||
argsEquals[len(argsEquals) - 1] = argsEquals[len(argsEquals) - 1] + " " + cleanedArg | ||
} else { | ||
if strings.Contains(arg, "'") { // e.g. --driver-java-options='-Djava.firstConfig=firstSetting | ||
inQuotes = true | ||
} | ||
// already joined or at the end, pass through | ||
argsEquals = append(argsEquals, cleanedArg) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can document somewhere that we expect single quotes to be used when a value has spaces in it. |
||
i += 1 | ||
} | ||
} | ||
|
@@ -383,33 +418,7 @@ func getValsFromPropertiesFile(path string) map[string]string { | |
return vals | ||
} | ||
|
||
func fetchMarathonConfig() (map[string]interface{}, error) { | ||
// fetch the spark task definition from Marathon, extract the docker image and HDFS config url: | ||
url := client.CreateServiceURL("replaceme", "") | ||
url.Path = fmt.Sprintf("/marathon/v2/apps/%s", config.ServiceName) | ||
|
||
responseBytes, err := client.CheckHTTPResponse( | ||
client.HTTPQuery(client.CreateHTTPURLRequest("GET", url, nil, "", ""))) | ||
|
||
responseJson := make(map[string]interface{}) | ||
err = json.Unmarshal(responseBytes, &responseJson) | ||
if err != nil { | ||
return responseJson, err | ||
} | ||
|
||
if config.Verbose { | ||
client.PrintMessage("Response from Marathon lookup of task '%s':", config.ServiceName) | ||
prettyJson, err := json.MarshalIndent(responseJson, "", " ") | ||
if err != nil { | ||
log.Fatalf("Failed to prettify json (%s): %s", err, responseJson) | ||
} else { | ||
client.PrintMessage("%s\n", string(prettyJson)) | ||
} | ||
} | ||
return responseJson, nil | ||
} | ||
|
||
func buildSubmitJson(cmd *SparkCommand, marathonConfig map[string]interface{}) (string, error) { | ||
func buildSubmitJson(cmd *SparkCommand) (string, error) { | ||
// first, import any values in the provided properties file (space separated "key val") | ||
// then map applicable envvars | ||
// then parse all -Dprop.key=propVal, and all --conf prop.key=propVal | ||
|
@@ -483,59 +492,30 @@ func buildSubmitJson(cmd *SparkCommand, marathonConfig map[string]interface{}) ( | |
args.properties["spark.app.name"] = args.mainClass | ||
} | ||
|
||
// driver image | ||
var imageSource string | ||
// driver image: use provided value | ||
_, contains := args.properties["spark.mesos.executor.docker.image"] | ||
if contains { | ||
imageSource = "Spark config: spark.mesos.executor.docker.image" | ||
} else { | ||
if cmd.submitDockerImage == "" { | ||
dispatcher_image, err := getStringFromTree(marathonConfig, []string{"app", "container", "docker", "image"}) | ||
if err != nil { | ||
return "", err | ||
} | ||
args.properties["spark.mesos.executor.docker.image"] = dispatcher_image | ||
imageSource = "dispatcher: container.docker.image" | ||
} else { | ||
args.properties["spark.mesos.executor.docker.image"] = cmd.submitDockerImage | ||
imageSource = "flag: --docker-image" | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think removing this section will change the existing behavior: if a special Docker image was provided when installing the Dispatcher, that same image will also be used for Drivers and/or Executors (see the logic below). Unless we make some changes in the Dispatcher. |
||
if !contains && cmd.submitDockerImage != "" { | ||
args.properties["spark.mesos.executor.docker.image"] = cmd.submitDockerImage | ||
} | ||
|
||
_, contains = args.properties["spark.mesos.executor.docker.forcePullImage"] | ||
if contains { | ||
client.PrintMessage("Using image '%s' for the driver (from %s)", | ||
args.properties["spark.mesos.executor.docker.image"], imageSource) | ||
} else { | ||
client.PrintMessage("Using image '%s' for the driver and the executors (from %s).", | ||
args.properties["spark.mesos.executor.docker.image"], imageSource) | ||
client.PrintMessage("To disable this image on executors, set "+ | ||
"spark.mesos.executor.docker.forcePullImage=false") | ||
if !contains { | ||
client.PrintMessage("Enabling forcePullImage by default. " + | ||
"To disable this, set spark.mesos.executor.docker.forcePullImage=false") | ||
args.properties["spark.mesos.executor.docker.forcePullImage"] = "true" | ||
} | ||
|
||
// Get the DCOS_SPACE from the marathon app | ||
dispatcherID, err := getStringFromTree(marathonConfig, []string{"app", "id"}) | ||
if err != nil { | ||
client.PrintMessage("Failed to get Dispatcher app id from Marathon app definition: %s", err) | ||
return "", err | ||
} | ||
client.PrintVerbose("Setting DCOS_SPACE to %s", dispatcherID) | ||
appendToProperty("spark.mesos.driver.labels", fmt.Sprintf("DCOS_SPACE:%s", dispatcherID), | ||
args) | ||
appendToProperty("spark.mesos.task.labels", fmt.Sprintf("DCOS_SPACE:%s", dispatcherID), | ||
args) | ||
|
||
// HDFS config | ||
hdfs_config_url, err := getStringFromTree(marathonConfig, []string{"app", "labels", "SPARK_HDFS_CONFIG_URL"}) | ||
if err == nil && len(hdfs_config_url) != 0 { // fail silently: it's normal for this to be unset | ||
hdfs_config_url = strings.TrimRight(hdfs_config_url, "/") | ||
appendToProperty("spark.mesos.uris", | ||
fmt.Sprintf("%s/hdfs-site.xml,%s/core-site.xml", hdfs_config_url, hdfs_config_url), args) | ||
// Get the DCOS_SPACE from the service name | ||
dcosSpace := config.ServiceName | ||
if !strings.HasPrefix(dcosSpace, "/") { | ||
dcosSpace = "/" + dcosSpace | ||
} | ||
client.PrintVerbose("Setting DCOS_SPACE to %s", dcosSpace) | ||
appendToProperty("spark.mesos.driver.labels", fmt.Sprintf("DCOS_SPACE:%s", dcosSpace), args) | ||
appendToProperty("spark.mesos.task.labels", fmt.Sprintf("DCOS_SPACE:%s", dcosSpace), args) | ||
|
||
// kerberos configuration: | ||
err = SetupKerberos(args, marathonConfig) | ||
err = SetupKerberos(args) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FWIW it may be necessary to make dispatcher additions in order for this to be removable? I don't remember for sure.
Effectively the dispatcher could grab these values from its environment and include them in the task automatically, instead of requiring the CLI to pass them in from the outside.