[CARBONDATA-48]Supported spark sql cli in carbon This closes #33

apache · Jul 12, 2016 · 3f56dfb · 3f56dfb
2 parents 90139be + 8619cb1
commit 3f56dfb
Show file tree

Hide file tree

Showing 3 changed files with 168 additions and 0 deletions.
diff --git a/bin/carbon-spark-sql b/bin/carbon-spark-sql
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Shell script for starting the carbondata sql CLI
+
+# Enter posix mode for bash
+set -o posix
+
+export CLASS="org.apache.spark.sql.hive.cli.CarbonSQLCLIDriver"
+
+# Figure out where Spark is installed
+if [ -z "$SPARK_HOME" ]
+then
+echo "\$SPARK_HOME is not set"
+fi
+
+export FWDIR=$SPARK_HOME
+export CARBON_SOURCE="$(cd "`dirname "$0"`"/..; pwd)"
+ASSEMBLY_DIR="$CARBON_SOURCE/assembly/target/scala-2.10"
+GREP_OPTIONS=
+num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^carbondata.*hadoop.*\.jar$" | wc -l)"
+if [ "$num_jars" -eq "0" -a -z "$ASSEMBLY_DIR" ]; then
+  echo "Failed to find Carbondata assembly in $ASSEMBLY_DIR." 1>&2
+  echo "You need to build Carbondata before running this program." 1>&2
+  exit 1
+fi
+ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^carbondata.*hadoop.*\.jar$" || true)"
+if [ "$num_jars" -gt "1" ]; then
+  echo "Found multiple Carbondata assembly jars in $ASSEMBLY_DIR:" 1>&2
+  echo "$ASSEMBLY_JARS" 1>&2
+  echo "Please remove all but one jar." 1>&2
+  exit 1
+fi
+
+ASSEMBLY_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"
+export JAR="$ASSEMBLY_JAR"
+
+function usage {
+  if [ -n "$1" ]; then
+    echo "$1"
+  fi
+  echo "Usage: ./bin/carbon-spark-sql [options] [cli option]"
+  pattern="usage"
+  pattern+="\|Spark assembly has been built"
+  pattern+="\|NOTE: SPARK_PREPEND_CLASSES is set"
+  pattern+="\|Spark Command: "
+  pattern+="\|--help"
+  pattern+="\|======="
+
+  "$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+  echo
+  echo "CLI options:"
+  "$FWDIR"/bin/spark-class "$CLASS" --help 2>&1 | grep -v "$pattern" 1>&2
+  exit "$2"
+}
+export -f usage
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  usage "" 0
+fi
+
+exec "$FWDIR"/bin/spark-submit "$@" --class "$CLASS" "$JAR"
diff --git a/core/pom.xml b/core/pom.xml
@@ -140,6 +140,12 @@
       <groupId>org.apache.zookeeper</groupId>
       <artifactId>zookeeper</artifactId>
       <version>3.4.7</version>
+      <exclusions>
+        <exclusion>
+          <groupId>jline</groupId>
+          <artifactId>jline</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
   </dependencies>
   <build>

diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/hive/cli/CarbonSQLCLIDriver.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/cli/CarbonSQLCLIDriver.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.hive.cli
+
+import java.io.File
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.{Logging, SparkConf, SparkContext}
+import org.apache.spark.scheduler.StatsReportListener
+import org.apache.spark.sql.CarbonContext
+import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.sql.hive.thriftserver.{SparkSQLCLIDriver, SparkSQLEnv}
+import org.apache.spark.util.Utils
+
+object CarbonSQLCLIDriver extends Logging {
+
+  var hiveContext: HiveContext = _
+  var sparkContext: SparkContext = _
+
+  def main(args: Array[String]): Unit = {
+    init()
+    SparkSQLEnv.sparkContext = sparkContext
+    SparkSQLEnv.hiveContext = hiveContext
+    SparkSQLCLIDriver.installSignalHandler()
+    SparkSQLCLIDriver.main(args)
+  }
+
+  def init() {
+    if (hiveContext == null) {
+      val sparkConf = new SparkConf(loadDefaults = true)
+      val maybeSerializer = sparkConf.getOption("spark.serializer")
+      val maybeKryoReferenceTracking = sparkConf.getOption("spark.kryo.referenceTracking")
+      // If user doesn't specify the appName, we want to get [SparkSQL::localHostName] instead of
+      // the default appName [CarbonSQLCLIDriver] in cli or beeline.
+      val maybeAppName = sparkConf
+        .getOption("spark.app.name")
+        .filterNot(_ == classOf[SparkSQLCLIDriver].getName)
+      val maybeStorePath = sparkConf.getOption("spark.carbon.storepath")
+
+      sparkConf
+        .setAppName(maybeAppName.getOrElse(s"CarbonSparkSQL::${ Utils.localHostName() }"))
+        .set(
+          "spark.serializer",
+          maybeSerializer.getOrElse("org.apache.spark.serializer.KryoSerializer"))
+        .set(
+          "spark.kryo.referenceTracking",
+          maybeKryoReferenceTracking.getOrElse("false"))
+
+      sparkContext = new SparkContext(sparkConf)
+      sparkContext.addSparkListener(new StatsReportListener())
+      val store = new File("../carbonsqlclistore")
+      store.mkdirs()
+      hiveContext = new CarbonContext(sparkContext,
+        maybeStorePath.getOrElse(store.getCanonicalPath),
+        store.getCanonicalPath)
+      hiveContext.setConf("carbon.kettle.home", "../processing/carbonplugins")
+
+      hiveContext.setConf("spark.sql.hive.version", HiveContext.hiveExecutionVersion)
+
+      if (log.isDebugEnabled) {
+        hiveContext.hiveconf.getAllProperties.asScala.toSeq.sorted.foreach { case (k, v) =>
+          logDebug(s"HiveConf var: $k=$v")
+        }
+      }
+    }
+  }
+
+}