Skip to content

Commit

Permalink
[CARBONDATA-48]Supported spark sql cli in carbon This closes #33
Browse files Browse the repository at this point in the history
  • Loading branch information
chenliang613 committed Jul 12, 2016
2 parents 90139be + 8619cb1 commit 3f56dfb
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 0 deletions.
79 changes: 79 additions & 0 deletions bin/carbon-spark-sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#
# Shell script for starting the carbondata sql CLI

# Enter posix mode for bash
set -o posix

export CLASS="org.apache.spark.sql.hive.cli.CarbonSQLCLIDriver"

# Figure out where Spark is installed
if [ -z "$SPARK_HOME" ]
then
echo "\$SPARK_HOME is not set"
fi

export FWDIR=$SPARK_HOME
export CARBON_SOURCE="$(cd "`dirname "$0"`"/..; pwd)"
ASSEMBLY_DIR="$CARBON_SOURCE/assembly/target/scala-2.10"
GREP_OPTIONS=
num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^carbondata.*hadoop.*\.jar$" | wc -l)"
if [ "$num_jars" -eq "0" -a -z "$ASSEMBLY_DIR" ]; then
echo "Failed to find Carbondata assembly in $ASSEMBLY_DIR." 1>&2
echo "You need to build Carbondata before running this program." 1>&2
exit 1
fi
ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^carbondata.*hadoop.*\.jar$" || true)"
if [ "$num_jars" -gt "1" ]; then
echo "Found multiple Carbondata assembly jars in $ASSEMBLY_DIR:" 1>&2
echo "$ASSEMBLY_JARS" 1>&2
echo "Please remove all but one jar." 1>&2
exit 1
fi

ASSEMBLY_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"
export JAR="$ASSEMBLY_JAR"

function usage {
if [ -n "$1" ]; then
echo "$1"
fi
echo "Usage: ./bin/carbon-spark-sql [options] [cli option]"
pattern="usage"
pattern+="\|Spark assembly has been built"
pattern+="\|NOTE: SPARK_PREPEND_CLASSES is set"
pattern+="\|Spark Command: "
pattern+="\|--help"
pattern+="\|======="

"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
echo
echo "CLI options:"
"$FWDIR"/bin/spark-class "$CLASS" --help 2>&1 | grep -v "$pattern" 1>&2
exit "$2"
}
export -f usage

if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
usage "" 0
fi

exec "$FWDIR"/bin/spark-submit "$@" --class "$CLASS" "$JAR"
6 changes: 6 additions & 0 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.7</version>
<exclusions>
<exclusion>
<groupId>jline</groupId>
<artifactId>jline</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.hive.cli

import java.io.File

import scala.collection.JavaConverters._

import org.apache.spark.{Logging, SparkConf, SparkContext}
import org.apache.spark.scheduler.StatsReportListener
import org.apache.spark.sql.CarbonContext
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.hive.thriftserver.{SparkSQLCLIDriver, SparkSQLEnv}
import org.apache.spark.util.Utils

object CarbonSQLCLIDriver extends Logging {

var hiveContext: HiveContext = _
var sparkContext: SparkContext = _

def main(args: Array[String]): Unit = {
init()
SparkSQLEnv.sparkContext = sparkContext
SparkSQLEnv.hiveContext = hiveContext
SparkSQLCLIDriver.installSignalHandler()
SparkSQLCLIDriver.main(args)
}

def init() {
if (hiveContext == null) {
val sparkConf = new SparkConf(loadDefaults = true)
val maybeSerializer = sparkConf.getOption("spark.serializer")
val maybeKryoReferenceTracking = sparkConf.getOption("spark.kryo.referenceTracking")
// If user doesn't specify the appName, we want to get [SparkSQL::localHostName] instead of
// the default appName [CarbonSQLCLIDriver] in cli or beeline.
val maybeAppName = sparkConf
.getOption("spark.app.name")
.filterNot(_ == classOf[SparkSQLCLIDriver].getName)
val maybeStorePath = sparkConf.getOption("spark.carbon.storepath")

sparkConf
.setAppName(maybeAppName.getOrElse(s"CarbonSparkSQL::${ Utils.localHostName() }"))
.set(
"spark.serializer",
maybeSerializer.getOrElse("org.apache.spark.serializer.KryoSerializer"))
.set(
"spark.kryo.referenceTracking",
maybeKryoReferenceTracking.getOrElse("false"))

sparkContext = new SparkContext(sparkConf)
sparkContext.addSparkListener(new StatsReportListener())
val store = new File("../carbonsqlclistore")
store.mkdirs()
hiveContext = new CarbonContext(sparkContext,
maybeStorePath.getOrElse(store.getCanonicalPath),
store.getCanonicalPath)
hiveContext.setConf("carbon.kettle.home", "../processing/carbonplugins")

hiveContext.setConf("spark.sql.hive.version", HiveContext.hiveExecutionVersion)

if (log.isDebugEnabled) {
hiveContext.hiveconf.getAllProperties.asScala.toSeq.sorted.foreach { case (k, v) =>
logDebug(s"HiveConf var: $k=$v")
}
}
}
}

}

0 comments on commit 3f56dfb

Please sign in to comment.