Skip to content

Commit

Permalink
Remove dependence on HIVE_HOME for running tests. This was done by mo…
Browse files Browse the repository at this point in the history
…ving all the hive query test (from branch-0.12) and data files into src/test/hive. These are used by default when HIVE_HOME is not set.
  • Loading branch information
marmbrus committed Jan 31, 2014
1 parent ebb56fa commit d91e276
Show file tree
Hide file tree
Showing 2,083 changed files with 67,866 additions and 14 deletions.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ all: s1 s2 s3 s4 s5 s6 s7 s8
compile:
sbt test:compile

# There is likely some bug here... still a good way to get a feeling if things are working in
# parallel.
s1: compile
sbt ${ARGS} -Dshark.hive.shard=0:8 "test-only catalyst.execution.HiveCompatibility"
s2: compile
Expand Down
36 changes: 23 additions & 13 deletions src/main/scala/catalyst/execution/TestShark.scala
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,8 @@ object TestShark extends SharkInstance {
* Returns the value of specified environmental variable as a [[java.io.File]] after checking
* to ensure it exists
*/
private def envVarToFile(envVar: String): File = {
assert(System.getenv(envVar) != null, s"$envVar not set")
val ret = new File(System.getenv(envVar))
assert(ret.exists(), s"Specified $envVar '${ret.getCanonicalPath}' does not exist.")
ret
private def envVarToFile(envVar: String): Option[File] = {
Option(System.getenv(envVar)).map(new File(_))
}

/**
Expand All @@ -90,10 +87,23 @@ object TestShark extends SharkInstance {
*/
private def rewritePaths(cmd: String): String =
if (cmd.toUpperCase contains "LOAD DATA")
cmd.replaceAll("\\.\\.", hiveDevHome.getCanonicalPath)
cmd.replaceAll("\\.\\.", TestShark.inRepoTests.getCanonicalPath)
else
cmd

val hiveFilesTemp = File.createTempFile("catalystHiveFiles", "")
hiveFilesTemp.delete()
hiveFilesTemp.mkdir()

val inRepoTests = new File("src/test/hive/")
def getHiveFile(path: String): File = {
val stripped = path.replaceAll("""\.\.\/""", "")
hiveDevHome
.map(new File(_, stripped))
.filter(_.exists)
.getOrElse(new File(inRepoTests, stripped))
}

val describedTable = "DESCRIBE (\\w+)".r

/**
Expand Down Expand Up @@ -156,10 +166,10 @@ object TestShark extends SharkInstance {
val hiveQTestUtilTables = Seq(
TestTable("src",
"CREATE TABLE src (key INT, value STRING)".cmd,
s"LOAD DATA LOCAL INPATH '${hiveDevHome.getCanonicalPath}/data/files/kv1.txt' INTO TABLE src".cmd),
s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/kv1.txt")}' INTO TABLE src".cmd),
TestTable("src1",
"CREATE TABLE src1 (key INT, value STRING)".cmd,
s"LOAD DATA LOCAL INPATH '${hiveDevHome.getCanonicalPath}/data/files/kv3.txt' INTO TABLE src1".cmd),
s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/kv3.txt")}' INTO TABLE src1".cmd),
TestTable("dest1",
"CREATE TABLE IF NOT EXISTS dest1 (key INT, value STRING)".cmd),
TestTable("dest2",
Expand All @@ -170,7 +180,7 @@ object TestShark extends SharkInstance {
runSqlHive("CREATE TABLE srcpart (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING)")
for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) {
runSqlHive(
s"""LOAD DATA LOCAL INPATH '${hiveDevHome.getCanonicalPath}/data/files/kv1.txt'
s"""LOAD DATA LOCAL INPATH '${getHiveFile("data/files/kv1.txt")}'
|OVERWRITE INTO TABLE srcpart PARTITION (ds='$ds',hr='$hr')
""".stripMargin)
}
Expand All @@ -179,7 +189,7 @@ object TestShark extends SharkInstance {
runSqlHive("CREATE TABLE srcpart1 (key INT, value STRING) PARTITIONED BY (ds STRING, hr INT)")
for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- 11 to 12) {
runSqlHive(
s"""LOAD DATA LOCAL INPATH '${hiveDevHome.getCanonicalPath}/data/files/kv1.txt'
s"""LOAD DATA LOCAL INPATH '${getHiveFile("data/files/kv1.txt")}'
|OVERWRITE INTO TABLE srcpart1 PARTITION (ds='$ds',hr='$hr')
""".stripMargin)
}
Expand All @@ -206,7 +216,7 @@ object TestShark extends SharkInstance {

catalog.client.createTable(srcThrift)

runSqlHive(s"LOAD DATA LOCAL INPATH '${hiveDevHome.getCanonicalPath}/data/files/complex.seq' INTO TABLE src_thrift")
runSqlHive(s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/complex.seq")}' INTO TABLE src_thrift")
}),
TestTable("serdeins",
s"""CREATE TABLE serdeins (key INT, value STRING)
Expand All @@ -219,7 +229,7 @@ object TestShark extends SharkInstance {
|ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}'
|WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)")
""".stripMargin.cmd,
s"LOAD DATA LOCAL INPATH '${hiveDevHome.getCanonicalPath}/data/files/sales.txt' INTO TABLE sales".cmd),
s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales".cmd),
TestTable("episodes",
s"""CREATE TABLE episodes (title STRING, air_date STRING, doctor INT)
|ROW FORMAT SERDE '${classOf[AvroSerDe].getCanonicalName}'
Expand Down Expand Up @@ -251,7 +261,7 @@ object TestShark extends SharkInstance {
| }'
|)
""".stripMargin.cmd,
s"LOAD DATA LOCAL INPATH '${hiveDevHome.getCanonicalPath}/data/files/episodes.avro' INTO TABLE episodes".cmd)
s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/episodes.avro")}' INTO TABLE episodes".cmd)
)

hiveQTestUtilTables.foreach(registerTestTable)
Expand Down
78 changes: 78 additions & 0 deletions src/test/hive/data/conf/hive-log4j.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Define some default values that can be overridden by system properties
hive.root.logger=DEBUG,DRFA
hive.log.dir=${build.dir.hive}/ql/tmp/
hive.log.file=hive.log

# Define the root logger to the system property "hadoop.root.logger".
log4j.rootLogger=${hive.root.logger}, EventCounter

# Logging Threshold
log4j.threshhold=WARN

#
# Daily Rolling File Appender
#

log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
log4j.appender.DRFA.File=${hive.log.dir}/${hive.log.file}

# Rollver at midnight
log4j.appender.DRFA.DatePattern=.yyyy-MM-dd

# 30-day backup
#log4j.appender.DRFA.MaxBackupIndex=30
log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout

# Pattern format: Date LogLevel LoggerName LogMessage
#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
# Debugging Pattern format
log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n


#
# console
# Add "console" to rootlogger above if you want to use this
#

log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n

#custom logging levels
#log4j.logger.xxx=DEBUG

#
# Event Counter Appender
# Sends counts of logging messages at different severity levels to Hadoop Metrics.
#
log4j.appender.EventCounter=org.apache.hadoop.hive.shims.HiveEventCounter


log4j.category.DataNucleus=ERROR,DRFA
log4j.category.Datastore=ERROR,DRFA
log4j.category.Datastore.Schema=ERROR,DRFA
log4j.category.JPOX.Datastore=ERROR,DRFA
log4j.category.JPOX.Plugin=ERROR,DRFA
log4j.category.JPOX.MetaData=ERROR,DRFA
log4j.category.JPOX.Query=ERROR,DRFA
log4j.category.JPOX.General=ERROR,DRFA
log4j.category.JPOX.Enhancer=ERROR,DRFA
log4j.logger.org.apache.hadoop.conf.Configuration=ERROR,DRFA

197 changes: 197 additions & 0 deletions src/test/hive/data/conf/hive-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<configuration>

<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files -->
<!-- that are implied by Hadoop setup variables. -->
<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive -->
<!-- users do not have to edit hadoop configuration files (that may be managed as a centralized -->
<!-- resource). -->

<!-- Hive Execution Parameters -->
<property>
<name>hadoop.tmp.dir</name>
<value>${build.dir.hive}/test/hadoop-${user.name}</value>
<description>A base for other temporary directories.</description>
</property>

<!--
<property>
<name>hive.exec.reducers.max</name>
<value>1</value>
<description>maximum number of reducers</description>
</property>
-->

<property>
<name>hive.exec.scratchdir</name>
<value>${build.dir}/scratchdir</value>
<description>Scratch space for Hive jobs</description>
</property>

<property>
<name>hive.exec.local.scratchdir</name>
<value>${build.dir}/localscratchdir/</value>
<description>Local scratch space for Hive jobs</description>
</property>

<property>
<name>javax.jdo.option.ConnectionURL</name>
<!-- note: variable substituion not working here because it's loaded by jdo, not Hive -->
<value>jdbc:derby:;databaseName=../build/test/junit_metastore_db;create=true</value>
</property>

<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.apache.derby.jdbc.EmbeddedDriver</value>
</property>

<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>APP</value>
</property>

<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>mine</value>
</property>

<property>
<!-- this should eventually be deprecated since the metastore should supply this -->
<name>hive.metastore.warehouse.dir</name>
<value>${test.warehouse.dir}</value>
<description></description>
</property>

<property>
<name>hive.metastore.metadb.dir</name>
<value>file://${build.dir}/test/data/metadb/</value>
<description>
Required by metastore server or if the uris argument below is not supplied
</description>
</property>

<property>
<name>test.log.dir</name>
<value>${build.dir}/test/logs</value>
<description></description>
</property>

<property>
<name>test.src.dir</name>
<value>file://${build.dir}/src/test</value>
<description></description>
</property>

<property>
<name>test.data.files</name>
<value>${user.dir}/../data/files</value>
<description></description>
</property>

<property>
<name>test.query.file1</name>
<value>file://${user.dir}/../ql/src/test/org/apache/hadoop/hive/ql/input2.q</value>
<value></value>
<description></description>
</property>

<property>
<name>hive.jar.path</name>
<value>${build.dir.hive}/ql/hive-exec-${version}.jar</value>
<description></description>
</property>

<property>
<name>hive.metastore.rawstore.impl</name>
<value>org.apache.hadoop.hive.metastore.ObjectStore</value>
<description>Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. This class is used to store and retrieval of raw metadata objects such as table, database</description>
</property>

<property>
<name>hive.querylog.location</name>
<value>${build.dir}/tmp</value>
<description>Location of the structured hive logs</description>
</property>

<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
<description>Pre Execute Hook for Tests</description>
</property>

<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
<description>Post Execute Hook for Tests</description>
</property>

<property>
<name>hive.task.progress</name>
<value>false</value>
<description>Track progress of a task</description>
</property>

<property>
<name>hive.support.concurrency</name>
<value>true</value>
<description>Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks.</description>
</property>

<property>
<name>fs.pfile.impl</name>
<value>org.apache.hadoop.fs.ProxyLocalFileSystem</value>
<description>A proxy for local file system used for cross file system testing</description>
</property>

<property>
<name>hive.exec.mode.local.auto</name>
<value>false</value>
<description>
Let hive determine whether to run in local mode automatically
Disabling this for tests so that minimr is not affected
</description>
</property>

<property>
<name>hive.auto.convert.join</name>
<value>false</value>
<description>Whether Hive enable the optimization about converting common join into mapjoin based on the input file size</description>
</property>

<property>
<name>hive.ignore.mapjoin.hint</name>
<value>false</value>
<description>Whether Hive ignores the mapjoin hint</description>
</property>

<property>
<name>hive.input.format</name>
<value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
<description>The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombineHiveInputFormat, it can always be manually set to HiveInputFormat. </description>
</property>

<property>
<name>hive.default.rcfile.serde</name>
<value>org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe</value>
<description>The default SerDe hive will use for the rcfile format</description>
</property>

</configuration>
4 changes: 4 additions & 0 deletions src/test/hive/data/files/SortCol1Col2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
110
111
210
211
5 changes: 5 additions & 0 deletions src/test/hive/data/files/SortCol2Col1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
110
210
111
211

4 changes: 4 additions & 0 deletions src/test/hive/data/files/SortDescCol1Col2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
211
210
111
110
Loading

0 comments on commit d91e276

Please sign in to comment.