-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-7294][SQL] ADD BETWEEN #5839
Changes from 1 commit
d11d5b9
baf839b
7d62368
76f0c51
f080f8d
7b9b858
7e64d1e
c54d904
d2e7f72
f928816
b15360d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1289,6 +1289,18 @@ def cast(self, dataType): | |
raise TypeError("unexpected type: %s" % type(dataType)) | ||
return Column(jc) | ||
|
||
@ignore_unicode_prefix | ||
def between(self, col1, col2): | ||
""" A boolean expression that is evaluated to true if the value of this | ||
expression is between the given columns. | ||
|
||
>>> df[df.col1.between(col2, col3)].collect() | ||
[Row(col1=5, col2=6, col3=8)] | ||
""" | ||
#sc = SparkContext._active_spark_context | ||
jc = self > col1 & self < col2 | ||
return Column(jc) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually I think you no longer need to wrap it in Column, since it is already a Python column. |
||
|
||
def __repr__(self): | ||
return 'Column<%s>' % self._jc.toString().encode('utf8') | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -426,6 +426,12 @@ def test_rand_functions(self): | |
for row in rndn: | ||
assert row[1] >= -4.0 and row[1] <= 4.0, "got: %s" % row[1] | ||
|
||
def test_between_function(self): | ||
df = self.sqlCtx.parallelize([Row(a=1, b=2, c=3), Row(a=2, b=1, c=3), Row(a=4, b=1, c=3)]).toDF() | ||
self.assertEqual([False, True, False], | ||
df.select(df.a.between(df.b, df.c)).collect()) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove one blank line. |
||
|
||
def test_save_and_load(self): | ||
df = self.df | ||
tmpPath = tempfile.mkdtemp() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -295,6 +295,20 @@ class Column(protected[sql] val expr: Expression) extends Logging { | |
*/ | ||
def eqNullSafe(other: Any): Column = this <=> other | ||
|
||
/** | ||
* Between col1 and col2. | ||
* | ||
* @group java_expr_ops | ||
*/ | ||
def between(col1: String, col2: String): Column = between(Column(col1), Column(col2)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe name the two parameters lowerBound and upperBound |
||
|
||
/** | ||
* Between col1 and col2. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "True if the current column is between the lower bound and upper bound, inclusive." |
||
* | ||
* @group java_expr_ops | ||
*/ | ||
def between(col1: Column, col2: Column): Column = And(GreaterThan(this.expr, col1.expr), LessThan(this.expr, col2.expr)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you'd need to wrap this so the line fits in 100 chars. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that between is really >= and <=, i.e. inclusive. |
||
|
||
/** | ||
* True if the current expression is null. | ||
* | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -208,6 +208,12 @@ class ColumnExpressionSuite extends QueryTest { | |
testData2.collect().toSeq.filter(r => r.getInt(0) <= r.getInt(1))) | ||
} | ||
|
||
test("between") { | ||
checkAnswer( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it'd be easier to just inline the data here I think, you can do val testData = Seq((0, 1, 2), (1, 2, 3), ...).toDF("a", "b", "c") |
||
testData4.filter($"a".between($"b", $"c")), | ||
testData4.collect().toSeq.filter(r => r.getInt(0) > r.getInt(1) && r.getInt(0) < r.getInt(2))) | ||
} | ||
|
||
val booleanData = TestSQLContext.createDataFrame(TestSQLContext.sparkContext.parallelize( | ||
Row(false, false) :: | ||
Row(false, true) :: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i think u need to add parenthesis, i.e. (self > col1) & (self < col2)