apache · kaka1992 · May 1, 2015 · May 2, 2015 · May 2, 2015 · May 2, 2015
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
@@ -1289,6 +1289,17 @@ def cast(self, dataType):
             raise TypeError("unexpected type: %s" % type(dataType))
         return Column(jc)
 
+    @ignore_unicode_prefix
+    def between(self, lowerBound, upperBound):
+        """ A boolean expression that is evaluated to true if the value of this
+        expression is between the given columns.
+
+        >>> df[df.col1.between(lowerBound, upperBound)].collect()
+        [Row(col1=5, col2=6, col3=8)]
+        """
+        jc = (self >= lowerBound) & (self <= upperBound)
+        return Column(jc)
+
     def __repr__(self):
         return 'Column<%s>' % self._jc.toString().encode('utf8')
 

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
@@ -426,6 +426,12 @@ def test_rand_functions(self):
         for row in rndn:
             assert row[1] >= -4.0 and row[1] <= 4.0, "got: %s" % row[1]
 
+    def test_between_function(self):
+        df = self.sqlCtx.parallelize([Row(a=1, b=2, c=3), Row(a=2, b=1, c=3), Row(a=4, b=1, c=4)]).toDF()
+        self.assertEqual([False, True, True],
+                     df.select(df.a.between(df.b, df.c)).collect())
+
+
     def test_save_and_load(self):
         df = self.df
         tmpPath = tempfile.mkdtemp()

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -295,6 +295,25 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    */
   def eqNullSafe(other: Any): Column = this <=> other
 
+  /**
+   * True if the current column is between the lower bound and upper bound, inclusive.
+   *
+   * @group java_expr_ops
+   */
+  def between(lowerBound: String, upperBound: String): Column = {
+    between(Column(lowerBound), Column(upperBound))
+  }
+
+  /**
+   * True if the current column is between the lower bound and upper bound, inclusive.
+   *
+   * @group java_expr_ops
+   */
+  def between(lowerBound: Column, upperBound: Column): Column = {
+    And(GreaterThanOrEqual(this.expr, lowerBound.expr),
+      LessThanOrEqual(this.expr, upperBound.expr))
+  }
+
   /**
    * True if the current expression is null.
    *

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -208,6 +208,12 @@ class ColumnExpressionSuite extends QueryTest {
       testData2.collect().toSeq.filter(r => r.getInt(0) <= r.getInt(1)))
   }
 
+  test("between") {
+    checkAnswer(
+      testData4.filter($"a".between($"b", $"c")),
+      testData4.collect().toSeq.filter(r => r.getInt(0) >= r.getInt(1) && r.getInt(0) <= r.getInt(2)))
+  }
+
   val booleanData = TestSQLContext.createDataFrame(TestSQLContext.sparkContext.parallelize(
     Row(false, false) ::
       Row(false, true) ::

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
@@ -57,6 +57,17 @@ object TestData {
       TestData2(3, 2) :: Nil, 2).toDF()
   testData2.registerTempTable("testData2")
 
+  case class TestData4(a: Int, b: Int, c: Int)
+  val testData4 =
+    TestSQLContext.sparkContext.parallelize(
+      TestData4(0, 1, 2) ::
+        TestData4(1, 2, 3) ::
+        TestData4(2, 1, 0) ::
+        TestData4(2, 2, 4) ::
+        TestData4(3, 1, 6) ::
+        TestData4(3, 2, 0) :: Nil, 2).toDF()
+  testData4.registerTempTable("TestData4")
+
   case class DecimalData(a: BigDecimal, b: BigDecimal)
 
   val decimalData =