Skip to content

Commit

Permalink
Whenever a not equal to filter is applied on dictionary column with n…
Browse files Browse the repository at this point in the history
…umeric datatype, the cast added by spark plan is removed while creating carbon filters from spark filter. Due to this plan modification incorrect results are returned by spark.
  • Loading branch information
manishgupta88 committed Mar 22, 2017
1 parent a1b8afa commit 45f552c
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 83 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
1,2015-07-23 00:00:00,china,aaa1,phone197,ASD69643,15000
7,2015-07-24 00:00:00,china,aaa2,phone756,ASD42892,15001
7,2015-07-25 00:00:00,china,aaa3,phone1904,ASD37014,15002
Original file line number Diff line number Diff line change
Expand Up @@ -39,25 +39,17 @@ class DateDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfterAll
try {
CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "true")
sql("drop table if exists directDictionaryTable ")
sql("drop table if exists directDictionaryTable_hive ")
sql(
"CREATE TABLE if not exists directDictionaryTable (empno int,doj date, " +
"salary int) " +
"STORED BY 'org.apache.carbondata.format'"
)

sql(
"CREATE TABLE if not exists directDictionaryTable_hive (empno int,doj date, " +
"salary int) " +
"row format delimited fields terminated by ','"
)

CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy-MM-dd")
val csvFilePath = s"$resourcesPath/datasamplefordate.csv"
sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE directDictionaryTable OPTIONS" +
"('DELIMITER'= ',', 'QUOTECHAR'= '\"')" )
sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE directDictionaryTable_hive")
} catch {
case x: Throwable =>
x.printStackTrace()
Expand Down Expand Up @@ -87,7 +79,7 @@ class DateDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfterAll

test("test direct dictionary for not equals condition") {
checkAnswer(
sql("select doj from directDictionaryTable where doj != '2016-04-14 00:00:00'"),
sql("select doj from directDictionaryTable where doj != '2016-04-14'"),
Seq(Row(Date.valueOf("2016-03-14"))
)
)
Expand All @@ -103,7 +95,7 @@ class DateDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfterAll

test("select doj from directDictionaryTable with equals filter") {
checkAnswer(
sql("select doj from directDictionaryTable where doj = '2016-03-14 00:00:00'"),
sql("select doj from directDictionaryTable where doj = '2016-03-14'"),
Seq(Row(Date.valueOf("2016-03-14")))
)

Expand All @@ -119,7 +111,7 @@ class DateDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfterAll
test("select doj from directDictionaryTable with regexp_replace NOT IN filter") {
checkAnswer(
sql("select doj from directDictionaryTable where regexp_replace(doj, '-', '/') NOT IN ('2016/03/14')"),
sql("select doj from directDictionaryTable_hive where regexp_replace(doj, '-', '/') NOT IN ('2016/03/14')")
Seq(Row(Date.valueOf("2016-04-14")))
)
}

Expand All @@ -139,7 +131,6 @@ class DateDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfterAll

override def afterAll {
sql("drop table directDictionaryTable")
sql("drop table directDictionaryTable_hive")
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "dd-MM-yyyy")
CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class DateDataTypeDirectDictionaryWithNoDictTestCase extends QueryTest with Befo

test("select doj from directDictionaryTable with equals filter") {
checkAnswer(
sql("select doj from directDictionaryTable where doj='2016-03-14 15:00:09'"),
sql("select doj from directDictionaryTable where doj='2016-03-14'"),
Seq(Row(Date.valueOf("2016-03-14")))
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,24 +141,26 @@ object CarbonFilters {
}

case And(left, right) =>
(translate(left) ++ translate(right)).reduceOption(sources.And)
val leftFilter = translate(left, or)
val rightFilter = translate(right, or)
if (or) {
if (leftFilter.isDefined && rightFilter.isDefined) {
(leftFilter ++ rightFilter).reduceOption(sources.And)
} else {
None
}
} else {
(leftFilter ++ rightFilter).reduceOption(sources.And)
}

case EqualTo(a: Attribute, Literal(v, t)) =>
Some(sources.EqualTo(a.name, v))
case EqualTo(l@Literal(v, t), a: Attribute) =>
Some(sources.EqualTo(a.name, v))
case EqualTo(Cast(a: Attribute, _), Literal(v, t)) =>
Some(sources.EqualTo(a.name, v))
case EqualTo(Literal(v, t), Cast(a: Attribute, _)) =>
Some(sources.EqualTo(a.name, v))
case Not(EqualTo(a: Attribute, Literal(v, t))) =>
Some(sources.Not(sources.EqualTo(a.name, v)))
case Not(EqualTo(Literal(v, t), a: Attribute)) =>
Some(sources.Not(sources.EqualTo(a.name, v)))
case Not(EqualTo(Cast(a: Attribute, _), Literal(v, t))) =>
Some(sources.Not(sources.EqualTo(a.name, v)))
case Not(EqualTo(Literal(v, t), Cast(a: Attribute, _))) =>
Some(sources.Not(sources.EqualTo(a.name, v)))
case IsNotNull(a: Attribute) =>
Some(sources.IsNotNull(a.name))
case IsNull(a: Attribute) =>
Expand All @@ -169,20 +171,10 @@ object CarbonFilters {
case In(a: Attribute, list) if !list.exists(!_.isInstanceOf[Literal]) =>
val hSet = list.map(e => e.eval(EmptyRow))
Some(sources.In(a.name, hSet.toArray))
case Not(In(Cast(a: Attribute, _), list)) if !list.exists(!_.isInstanceOf[Literal]) =>
val hSet = list.map(e => e.eval(EmptyRow))
Some(sources.Not(sources.In(a.name, hSet.toArray)))
case In(Cast(a: Attribute, _), list) if !list.exists(!_.isInstanceOf[Literal]) =>
val hSet = list.map(e => e.eval(EmptyRow))
Some(sources.In(a.name, hSet.toArray))
case GreaterThan(a: Attribute, Literal(v, t)) =>
Some(sources.GreaterThan(a.name, v))
case GreaterThan(Literal(v, t), a: Attribute) =>
Some(sources.LessThan(a.name, v))
case GreaterThan(Cast(a: Attribute, _), Literal(v, t)) =>
Some(sources.GreaterThan(a.name, v))
case GreaterThan(Literal(v, t), Cast(a: Attribute, _)) =>
Some(sources.LessThan(a.name, v))
case LessThan(a: Attribute, Literal(v, t)) =>
Some(sources.LessThan(a.name, v))
case LessThan(Literal(v, t), a: Attribute) =>
Expand All @@ -195,18 +187,10 @@ object CarbonFilters {
Some(sources.GreaterThanOrEqual(a.name, v))
case GreaterThanOrEqual(Literal(v, t), a: Attribute) =>
Some(sources.LessThanOrEqual(a.name, v))
case GreaterThanOrEqual(Cast(a: Attribute, _), Literal(v, t)) =>
Some(sources.GreaterThanOrEqual(a.name, v))
case GreaterThanOrEqual(Literal(v, t), Cast(a: Attribute, _)) =>
Some(sources.LessThanOrEqual(a.name, v))
case LessThanOrEqual(a: Attribute, Literal(v, t)) =>
Some(sources.LessThanOrEqual(a.name, v))
case LessThanOrEqual(Literal(v, t), a: Attribute) =>
Some(sources.GreaterThanOrEqual(a.name, v))
case LessThanOrEqual(Cast(a: Attribute, _), Literal(v, t)) =>
Some(sources.LessThanOrEqual(a.name, v))
case LessThanOrEqual(Literal(v, t), Cast(a: Attribute, _)) =>
Some(sources.GreaterThanOrEqual(a.name, v))

case others =>
if (!or) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -393,38 +393,39 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy {
* Tries to translate a Catalyst [[Expression]] into data source [[Filter]].
* @return a `Some[Filter]` if the input [[Expression]] is convertible, otherwise a `None`.
*/
protected[sql] def translateFilter(predicate: Expression): Option[Filter] = {
protected[sql] def translateFilter(predicate: Expression, or: Boolean = false): Option[Filter] = {
predicate match {
case or@ Or(left, right) =>
case or@Or(left, right) =>

val leftFilter = translateFilter(left)
val rightFilter = translateFilter(right)
val leftFilter = translateFilter(left, true)
val rightFilter = translateFilter(right, true)
if (leftFilter.isDefined && rightFilter.isDefined) {
Some( sources.Or(leftFilter.get, rightFilter.get))
Some(sources.Or(leftFilter.get, rightFilter.get))
} else {
None
}

case And(left, right) =>
(translateFilter(left) ++ translateFilter(right)).reduceOption(sources.And)
val leftFilter = translateFilter(left, or)
val rightFilter = translateFilter(right, or)
if (or) {
if (leftFilter.isDefined && rightFilter.isDefined) {
(translateFilter(left) ++ translateFilter(right)).reduceOption(sources.And)
} else {
None
}
} else {
(translateFilter(left) ++ translateFilter(right)).reduceOption(sources.And)
}

case EqualTo(a: Attribute, Literal(v, t)) =>
Some(sources.EqualTo(a.name, v))
case EqualTo(l@Literal(v, t), a: Attribute) =>
Some(sources.EqualTo(a.name, v))
case EqualTo(Cast(a: Attribute, _), Literal(v, t)) =>
Some(sources.EqualTo(a.name, v))
case EqualTo(Literal(v, t), Cast(a: Attribute, _)) =>
Some(sources.EqualTo(a.name, v))

case Not(EqualTo(a: Attribute, Literal(v, t))) =>
Some(sources.Not(sources.EqualTo(a.name, v)))
case Not(EqualTo(Literal(v, t), a: Attribute)) =>
Some(sources.Not(sources.EqualTo(a.name, v)))
case Not(EqualTo(Cast(a: Attribute, _), Literal(v, t))) =>
Some(sources.Not(sources.EqualTo(a.name, v)))
case Not(EqualTo(Literal(v, t), Cast(a: Attribute, _))) =>
Some(sources.Not(sources.EqualTo(a.name, v)))
case IsNotNull(a: Attribute) => Some(sources.IsNotNull(a.name))
case IsNull(a: Attribute) => Some(sources.IsNull(a.name))
case Not(In(a: Attribute, list)) if !list.exists(!_.isInstanceOf[Literal]) =>
Expand All @@ -433,50 +434,22 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy {
case In(a: Attribute, list) if !list.exists(!_.isInstanceOf[Literal]) =>
val hSet = list.map(e => e.eval(EmptyRow))
Some(sources.In(a.name, hSet.toArray))
case Not(In(Cast(a: Attribute, _), list))
if !list.exists(!_.isInstanceOf[Literal]) =>
val hSet = list.map(e => e.eval(EmptyRow))
Some(sources.Not(sources.In(a.name, hSet.toArray)))
case In(Cast(a: Attribute, _), list) if !list.exists(!_.isInstanceOf[Literal]) =>
val hSet = list.map(e => e.eval(EmptyRow))
Some(sources.In(a.name, hSet.toArray))

case GreaterThan(a: Attribute, Literal(v, t)) =>
Some(sources.GreaterThan(a.name, v))
case GreaterThan(Literal(v, t), a: Attribute) =>
Some(sources.LessThan(a.name, v))
case GreaterThan(Cast(a: Attribute, _), Literal(v, t)) =>
Some(sources.GreaterThan(a.name, v))
case GreaterThan(Literal(v, t), Cast(a: Attribute, _)) =>
Some(sources.LessThan(a.name, v))

case LessThan(a: Attribute, Literal(v, t)) =>
Some(sources.LessThan(a.name, v))
case LessThan(Literal(v, t), a: Attribute) =>
Some(sources.GreaterThan(a.name, v))
case LessThan(Cast(a: Attribute, _), Literal(v, t)) =>
Some(sources.LessThan(a.name, v))
case LessThan(Literal(v, t), Cast(a: Attribute, _)) =>
Some(sources.GreaterThan(a.name, v))

case GreaterThanOrEqual(a: Attribute, Literal(v, t)) =>
Some(sources.GreaterThanOrEqual(a.name, v))
case GreaterThanOrEqual(Literal(v, t), a: Attribute) =>
Some(sources.LessThanOrEqual(a.name, v))
case GreaterThanOrEqual(Cast(a: Attribute, _), Literal(v, t)) =>
Some(sources.GreaterThanOrEqual(a.name, v))
case GreaterThanOrEqual(Literal(v, t), Cast(a: Attribute, _)) =>
Some(sources.LessThanOrEqual(a.name, v))

case LessThanOrEqual(a: Attribute, Literal(v, t)) =>
Some(sources.LessThanOrEqual(a.name, v))
case LessThanOrEqual(Literal(v, t), a: Attribute) =>
Some(sources.GreaterThanOrEqual(a.name, v))
case LessThanOrEqual(Cast(a: Attribute, _), Literal(v, t)) =>
Some(sources.LessThanOrEqual(a.name, v))
case LessThanOrEqual(Literal(v, t), Cast(a: Attribute, _)) =>
Some(sources.GreaterThanOrEqual(a.name, v))

case others => None
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.carbondata.query

import org.apache.spark.sql.common.util.QueryTest
import org.scalatest.BeforeAndAfterAll

import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties

/**
* Test cases for testing columns having \N or \null values for non numeric columns
*/
class TestNotEqualToFilter extends QueryTest with BeforeAndAfterAll {

override def beforeAll {
sql("drop table if exists test_not_equal_to_carbon")
sql("drop table if exists test_not_equal_to_hive")
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT
)
sql(
"""
CREATE TABLE IF NOT EXISTS test_not_equal_to_carbon
(ID Int, date Timestamp, country String,
name String, phonetype String, serialname String, salary Int)
STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('dictionary_include'='id')
""")
sql(
"""
CREATE TABLE IF NOT EXISTS test_not_equal_to_hive
(ID Int, date Timestamp, country String,
name String, phonetype String, serialname String, salary Int)
row format delimited fields terminated by ','
""")
sql(
s"""
LOAD DATA LOCAL INPATH '$resourcesPath/filter/notEqualToFilter.csv' into table
test_not_equal_to_carbon
OPTIONS('FILEHEADER'='ID,date,country,name,phonetype,serialname,salary')
""")
sql(
s"""
LOAD DATA LOCAL INPATH '$resourcesPath/filter/notEqualToFilter.csv' into table
test_not_equal_to_hive
""")
}

test("select Id from test_not_equal_to_carbon where id != '7'") {
checkAnswer(
sql("select Id from test_not_equal_to_carbon where id != '7'"),
sql("select Id from test_not_equal_to_hive where id != '7'")
)
}

test("select Id from test_not_equal_to_carbon where id != 7.0") {
checkAnswer(
sql("select Id from test_not_equal_to_carbon where id != 7.0"),
sql("select Id from test_not_equal_to_hive where id != 7.0")
)
}

test("select Id from test_not_equal_to_carbon where id != 7") {
checkAnswer(
sql("select Id from test_not_equal_to_carbon where id != 7"),
sql("select Id from test_not_equal_to_hive where id != 7")
)
}

override def afterAll {
sql("drop table if exists test_not_equal_to_carbon")
sql("drop table if exists test_not_equal_to_hive")
}
}

0 comments on commit 45f552c

Please sign in to comment.