Skip to content

Commit

Permalink
Merge e67a15a into 55c7a2a
Browse files Browse the repository at this point in the history
  • Loading branch information
alancai98 authored Dec 12, 2023
2 parents 55c7a2a + e67a15a commit e0bf888
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 23 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,14 @@ Thank you to all who have contributed!
- **BREAKING** In the produced plan:
- The new plan is fully resolved and typed.
- Operators will be converted to function call.

- Changes the return type of `filter_distinct` to a list if input collection is list

### Deprecated

### Fixed
- Fixes the CLI hanging on invalid queries. See issue #1230.
- Fixes Timestamp Type parsing issue. Previously Timestamp Type would get parsed to a Time type.
- Fixes the physical plan compiler to return list when `DISTINCT` used with `ORDER BY`

### Removed
- **Breaking** Removed IR factory in favor of static top-level functions. Change `Ast.foo()`
Expand Down
8 changes: 4 additions & 4 deletions docs/wiki/documentation/Functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -477,20 +477,20 @@ EXTRACT(TIMEZONE_MINUTE FROM TIME WITH TIME ZONE '23:12:59-08:30') -- -30
### `FILTER_DISTINCT` -- since v0.7.0

Signature
: `FILTER_DISTINCT: Container -> Bag`
: `FILTER_DISTINCT: Container -> Bag|List`

Header
: `FILTER_DISTINCT(c)`

Purpose
: Returns a bag of distinct values contained within a bag, list, sexp, or struct. If the container is a struct,
the field names are not considered.
: Returns a bag or list of distinct values contained within a bag, list, sexp, or struct. If the container is a struct,
the field names are not considered. A list will be returned if and only if the input is a list.

Examples
:

```sql
FILTER_DISTINCT([0, 0, 1]) -- <<0, 1>>
FILTER_DISTINCT([0, 0, 1]) -- [0, 1]
FILTER_DISTINCT(<<0, 0, 1>>) -- <<0, 1>>
FILTER_DISTINCT(SEXP(0, 0, 1)) -- <<0, 1>>
FILTER_DISTINCT({'a': 0, 'b': 0, 'c': 1}) -- <<0, 1>>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,31 +110,33 @@ internal object ExprFunctionUtcNow : ExprFunction {
}

/**
* Returns a bag of distinct values contained within a bag, list, sexp, or struct.
* Returns a bag or list of distinct values contained within a bag, list, sexp, or struct.
* If the container is a struct, the field names are not considered.
*/
internal object ExprFunctionFilterDistinct : ExprFunction {

override val signature = FunctionSignature(
name = "filter_distinct",
requiredParameters = listOf(unionOf(StaticType.BAG, StaticType.LIST, StaticType.SEXP, StaticType.STRUCT)),
returnType = StaticType.BAG
returnType = unionOf(StaticType.BAG, StaticType.LIST)
)

override fun callWithRequired(session: EvaluationSession, required: List<ExprValue>): ExprValue {
val argument = required.first()
// We cannot use a [HashSet] here because [ExprValue] does not implement .equals() and .hashCode()
val encountered = TreeSet(DEFAULT_COMPARATOR)
return ExprValue.newBag(
sequence {
argument.asSequence().forEach {
if (!encountered.contains(it)) {
encountered.add(it.unnamedValue())
yield(it)
}
val seq = sequence {
argument.asSequence().forEach {
if (!encountered.contains(it)) {
encountered.add(it.unnamedValue())
yield(it)
}
}
)
}
return when (argument.type) {
ExprValueType.LIST -> ExprValue.newList(seq)
else -> ExprValue.newBag(seq)
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,36 @@ class FilterDistinctEvaluationTest : EvaluatorTestBase() {
override fun getParameters(): List<Any> = listOf(

// These three tests ensure we can accept lists, bags, s-expressions and structs
ExprFunctionTestCase("filter_distinct([0, 0, 1])", "$BAG_ANNOTATION::[0, 1]"), // list
ExprFunctionTestCase("filter_distinct(<<0, 0, 1>>)", "$BAG_ANNOTATION::[0, 1]"), // bag
ExprFunctionTestCase("filter_distinct(SEXP(0, 0, 1))", "$BAG_ANNOTATION::[0, 1]"), // s-exp
ExprFunctionTestCase("filter_distinct({'a': 0, 'b': 0, 'c': 1})", "$BAG_ANNOTATION::[0, 1]"), // struct
ExprFunctionTestCase(
"filter_distinct([0, 0, 1])",
"[0, 1]"
), // list
ExprFunctionTestCase(
"filter_distinct(<<0, 0, 1>>)",
"$BAG_ANNOTATION::[0, 1]"
), // bag
ExprFunctionTestCase(
"filter_distinct(SEXP(0, 0, 1))",
"$BAG_ANNOTATION::[0, 1]"
), // s-exp
ExprFunctionTestCase(
"filter_distinct({'a': 0, 'b': 0, 'c': 1})",
"$BAG_ANNOTATION::[0, 1]"
), // struct

// Some "smoke tests" to ensure the basic plumbing is working right.
ExprFunctionTestCase("filter_distinct(['foo', 'foo', 1, 1, `symbol`, `symbol`])", "$BAG_ANNOTATION::[\"foo\", 1, symbol]"),
ExprFunctionTestCase("filter_distinct([{ 'a': 1 }, { 'a': 1 }, { 'a': 1 }])", "$BAG_ANNOTATION::[{ 'a': 1 }]"),
ExprFunctionTestCase("filter_distinct([[1, 1], [1, 1], [2, 2]])", "$BAG_ANNOTATION::[[1,1], [2, 2]]"),
ExprFunctionTestCase(
"filter_distinct(['foo', 'foo', 1, 1, `symbol`, `symbol`])",
"[\"foo\", 1, symbol]"
),
ExprFunctionTestCase(
"filter_distinct([{ 'a': 1 }, { 'a': 1 }, { 'a': 1 }])",
"[{ 'a': 1 }]"
),
ExprFunctionTestCase(
"filter_distinct([[1, 1], [1, 1], [2, 2]])",
"[[1,1], [2, 2]]"
),
)
}

Expand Down Expand Up @@ -70,5 +91,9 @@ class FilterDistinctEvaluationTest : EvaluatorTestBase() {
}

@Test
fun invalidArityTest() = checkInvalidArity(funcName = "filter_distinct", maxArity = 1, minArity = 1)
fun invalidArityTest() = checkInvalidArity(
funcName = "filter_distinct",
maxArity = 1,
minArity = 1
)
}

0 comments on commit e0bf888

Please sign in to comment.