Skip to content

Commit

Permalink
Update test harness to work with both Hive 12 and 13.
Browse files Browse the repository at this point in the history
  • Loading branch information
marmbrus committed Oct 24, 2014
1 parent f044843 commit 8f6b09a
Show file tree
Hide file tree
Showing 21 changed files with 184 additions and 76 deletions.
2 changes: 1 addition & 1 deletion dev/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS
# If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled.
# This must be a single argument, as it is.
if [ -n "$_RUN_SQL_TESTS" ]; then
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-0.12.0"
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
fi

if [ -n "$_SQL_TESTS_ONLY" ]; then
Expand Down
6 changes: 5 additions & 1 deletion project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,11 @@ object Hive {
|import org.apache.spark.sql.hive._
|import org.apache.spark.sql.hive.test.TestHive._
|import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin,
cleanupCommands in console := "sparkContext.stop()"
cleanupCommands in console := "sparkContext.stop()",
// Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce
// in order to generate golden files. This is only required for developers who are adding new
// new query tests.
fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") }
)

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ object HiveTypeCoercion {
*/
trait HiveTypeCoercion {

import HiveTypeCoercion._

val typeCoercionRules =
PropagateTypes ::
ConvertNaNs ::
Expand Down Expand Up @@ -340,6 +342,13 @@ trait HiveTypeCoercion {
// Skip nodes who's children have not been resolved yet.
case e if !e.childrenResolved => e

case a @ CreateArray(children) if !a.resolved =>
val commonType = a.childTypes.reduce(
(a,b) =>
findTightestCommonType(a,b).getOrElse(StringType))
CreateArray(
children.map(c => if (c.dataType == commonType) c else Cast(c, commonType)))

// Promote SUM, SUM DISTINCT and AVERAGE to largest types to prevent overflows.
case s @ Sum(e @ DecimalType()) => s // Decimal is already the biggest.
case Sum(e @ IntegralType()) if e.dataType != LongType => Sum(Cast(e, LongType))
Expand All @@ -356,6 +365,10 @@ trait HiveTypeCoercion {
Average(Cast(e, LongType))
case Average(e @ FractionalType()) if e.dataType != DoubleType =>
Average(Cast(e, DoubleType))

// Hive lets you do aggregation of timestamps... for some reason
case Sum(e @ TimestampType()) => Sum(Cast(e, DoubleType))
case Average(e @ TimestampType()) => Average(Cast(e, DoubleType))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,28 @@ case class GetField(child: Expression, fieldName: String) extends UnaryExpressio

override def toString = s"$child.$fieldName"
}

/**
* Returns an Array containing the evaluation of all children expressions.
*/
case class CreateArray(children: Seq[Expression]) extends Expression {
override type EvaluatedType = Any

lazy val childTypes = children.map(_.dataType).distinct

override lazy val resolved =
childrenResolved && childTypes.size <= 1

override def dataType: DataType = {
assert(resolved, s"Invalid dataType of mixed ArrayType ${childTypes.mkString(",")}")
ArrayType(childTypes.headOption.getOrElse(NullType))
}

override def nullable: Boolean = false

override def eval(input: Row): Any = {
children.map(_.eval(input))
}

override def toString = s"Array(${children.mkString(",")})"
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import org.apache.spark.sql.catalyst.optimizer.{Optimizer, DefaultOptimizer}
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.RuleExecutor
import org.apache.spark.sql.catalyst.types.DataType
import org.apache.spark.sql.columnar.InMemoryRelation
import org.apache.spark.sql.execution.{SparkStrategies, _}
import org.apache.spark.sql.json._
import org.apache.spark.sql.parquet.ParquetRelation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import java.util.{Locale, TimeZone}
import org.scalatest.BeforeAndAfter

import org.apache.spark.sql.SQLConf
import org.apache.spark.sql.hive.HiveShim
import org.apache.spark.sql.hive.test.TestHive

/**
Expand Down Expand Up @@ -135,6 +136,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"stats20",
"alter_merge_stats",
"columnstats.*",
"annotate_stats.*",
"database_drop",
"index_serde",


// Hive seems to think 1.0 > NaN = true && 1.0 < NaN = false... which is wrong.
Expand Down Expand Up @@ -211,32 +215,27 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"describe_comment_indent",

// Limit clause without a ordering, which causes failure.
"orc_predicate_pushdown"
)
"orc_predicate_pushdown",

// Requires precision decimal support:
"decimal_1",
"udf_pmod",
"udf_when",
"udf_case",
"udf_to_double",
"udf_to_float",

// Needs constant object inspectors
"udf_round",
"udf7"
) ++ HiveShim.compatibilityBlackList

/**
* The set of tests that are believed to be working in catalyst. Tests not on whiteList or
* blacklist are implicitly marked as ignored.
*/
override def whiteList = Seq(
"add_part_exist",
"dynamic_partition_skip_default",
"infer_bucket_sort_dyn_part",
"load_dyn_part1",
"load_dyn_part2",
"load_dyn_part3",
"load_dyn_part4",
"load_dyn_part5",
"load_dyn_part6",
"load_dyn_part7",
"load_dyn_part8",
"load_dyn_part9",
"load_dyn_part10",
"load_dyn_part11",
"load_dyn_part12",
"load_dyn_part13",
"load_dyn_part14",
"load_dyn_part14_win",
"add_part_multiple",
"add_partition_no_whitelist",
"add_partition_with_whitelist",
Expand All @@ -256,6 +255,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"alter_varchar2",
"alter_view_as_select",
"ambiguous_col",
"annotate_stats_join",
"annotate_stats_limit",
"annotate_stats_part",
"annotate_stats_table",
"annotate_stats_union",
"auto_join0",
"auto_join1",
"auto_join10",
Expand Down Expand Up @@ -299,6 +303,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"auto_sortmerge_join_13",
"auto_sortmerge_join_14",
"auto_sortmerge_join_15",
"auto_sortmerge_join_16",
"auto_sortmerge_join_2",
"auto_sortmerge_join_3",
"auto_sortmerge_join_4",
Expand Down Expand Up @@ -340,7 +345,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"create_skewed_table1",
"create_struct_table",
"cross_join",
"cross_product_check_1",
"cross_product_check_2",
"ct_case_insensitive",
"database_drop",
"database_location",
"database_properties",
"date_2",
Expand All @@ -360,32 +368,42 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"diff_part_input_formats",
"disable_file_format_check",
"disallow_incompatible_type_change_off",
"distinct_stats",
"drop_database_removes_partition_dirs",
"drop_function",
"drop_index",
"drop_index_removes_partition_dirs",
"drop_multi_partitions",
"drop_partitions_filter",
"drop_partitions_filter2",
"drop_partitions_filter3",
"drop_partitions_ignore_protection",
"drop_table",
"drop_table2",
"drop_table_removes_partition_dirs",
"drop_view",
"dynamic_partition_skip_default",
"escape_clusterby1",
"escape_distributeby1",
"escape_orderby1",
"escape_sortby1",
"explain_rearrange",
"fetch_aggregation",
"fileformat_mix",
"fileformat_sequencefile",
"fileformat_text",
"filter_join_breaktask",
"filter_join_breaktask2",
"groupby1",
"groupby11",
"groupby12",
"groupby1_limit",
"groupby1_map",
"groupby1_map_nomap",
"groupby1_map_skew",
"groupby1_noskew",
"groupby2",
"groupby2_limit",
"groupby2_map",
"groupby2_map_skew",
"groupby2_noskew",
Expand All @@ -406,6 +424,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"groupby7_map_multi_single_reducer",
"groupby7_map_skew",
"groupby7_noskew",
"groupby7_noskew_multi_single_reducer",
"groupby8",
"groupby8_map",
"groupby8_map_skew",
Expand All @@ -432,6 +451,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"groupby_sort_test_1",
"having",
"implicit_cast1",
"index_serde",
"infer_bucket_sort_dyn_part",
"innerjoin",
"inoutdriver",
"input",
Expand Down Expand Up @@ -502,7 +523,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"join17",
"join18",
"join19",
"join_1to1",
"join2",
"join20",
"join21",
Expand Down Expand Up @@ -534,6 +554,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"join7",
"join8",
"join9",
"join_1to1",
"join_array",
"join_casesensitive",
"join_empty",
Expand All @@ -557,7 +578,21 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"literal_double",
"literal_ints",
"literal_string",
"load_dyn_part1",
"load_dyn_part10",
"load_dyn_part11",
"load_dyn_part12",
"load_dyn_part13",
"load_dyn_part14",
"load_dyn_part14_win",
"load_dyn_part2",
"load_dyn_part3",
"load_dyn_part4",
"load_dyn_part5",
"load_dyn_part6",
"load_dyn_part7",
"load_dyn_part8",
"load_dyn_part9",
"load_file_with_space_in_the_name",
"loadpart1",
"louter_join_ppr",
Expand All @@ -578,13 +613,13 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"merge1",
"merge2",
"mergejoins",
"multigroupby_singlemr",
"multiMapJoin1",
"multiMapJoin2",
"multi_insert_gby",
"multi_insert_gby3",
"multi_insert_lateral_view",
"multi_join_union",
"multiMapJoin1",
"multiMapJoin2",
"multigroupby_singlemr",
"noalias_subq1",
"nomore_ambiguous_table_col",
"nonblock_op_deduplicate",
Expand All @@ -607,10 +642,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"outer_join_ppr",
"parallel",
"parenthesis_star_by",
"partcols1",
"part_inherit_tbl_props",
"part_inherit_tbl_props_empty",
"part_inherit_tbl_props_with_star",
"partcols1",
"partition_date",
"partition_schema1",
"partition_serde_format",
Expand Down Expand Up @@ -641,7 +676,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"ppd_outer_join5",
"ppd_random",
"ppd_repeated_alias",
"ppd_transform",
"ppd_udf_col",
"ppd_union",
"ppr_allchildsarenull",
Expand Down Expand Up @@ -674,15 +708,15 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"serde_regex",
"serde_reported_schema",
"set_variable_sub",
"show_create_table_partitioned",
"show_create_table_delimited",
"show_columns",
"show_create_table_alter",
"show_create_table_view",
"show_create_table_serde",
"show_create_table_db_table",
"show_create_table_delimited",
"show_create_table_does_not_exist",
"show_create_table_index",
"show_columns",
"show_create_table_partitioned",
"show_create_table_serde",
"show_create_table_view",
"show_describe_func_quotes",
"show_functions",
"show_partitions",
Expand Down Expand Up @@ -738,12 +772,14 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udaf_covar_pop",
"udaf_covar_samp",
"udaf_histogram_numeric",
"udf_10_trims",
"udf2",
"udf6",
"udf7",
"udf8",
"udf9",
"udf_10_trims",
"udf_E",
"udf_PI",
"udf_abs",
"udf_acos",
"udf_add",
Expand Down Expand Up @@ -774,14 +810,13 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_cos",
"udf_count",
"udf_date_add",
"udf_datediff",
"udf_date_sub",
"udf_datediff",
"udf_day",
"udf_dayofmonth",
"udf_degrees",
"udf_div",
"udf_double",
"udf_E",
"udf_elt",
"udf_equal",
"udf_exp",
Expand Down Expand Up @@ -826,7 +861,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_nvl",
"udf_or",
"udf_parse_url",
"udf_PI",
"udf_pmod",
"udf_positive",
"udf_pow",
Expand Down
Loading

0 comments on commit 8f6b09a

Please sign in to comment.