Skip to content

Commit

Permalink
[SPARK-19089][SQL] Add support for nested sequences
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

Replaced specific sequence encoders with generic sequence encoder to enable nesting of sequences.

Does not add support for nested arrays as that cannot be solved in this way.

## How was this patch tested?

```bash
build/mvn -DskipTests clean package && dev/run-tests
```

Additionally in Spark shell:

```
scala> Seq(Seq(Seq(1))).toDS.collect()
res0: Array[Seq[Seq[Int]]] = Array(List(List(1)))
```

Author: Michal Senkyr <mike.senkyr@gmail.com>

Closes #18011 from michalsenkyr/dataset-seq-nested.

(cherry picked from commit a2b3b67)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
michalsenkyr authored and cloud-fan committed May 22, 2017
1 parent cfd1bf0 commit 41d8d21
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 44 deletions.
55 changes: 11 additions & 44 deletions sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
Original file line number Diff line number Diff line change
Expand Up @@ -111,93 +111,60 @@ abstract class SQLImplicits extends LowPrioritySQLImplicits {

/**
* @since 1.6.1
* @deprecated use [[newIntSequenceEncoder]]
* @deprecated use [[newSequenceEncoder]]
*/
def newIntSeqEncoder: Encoder[Seq[Int]] = ExpressionEncoder()

/**
* @since 1.6.1
* @deprecated use [[newLongSequenceEncoder]]
* @deprecated use [[newSequenceEncoder]]
*/
def newLongSeqEncoder: Encoder[Seq[Long]] = ExpressionEncoder()

/**
* @since 1.6.1
* @deprecated use [[newDoubleSequenceEncoder]]
* @deprecated use [[newSequenceEncoder]]
*/
def newDoubleSeqEncoder: Encoder[Seq[Double]] = ExpressionEncoder()

/**
* @since 1.6.1
* @deprecated use [[newFloatSequenceEncoder]]
* @deprecated use [[newSequenceEncoder]]
*/
def newFloatSeqEncoder: Encoder[Seq[Float]] = ExpressionEncoder()

/**
* @since 1.6.1
* @deprecated use [[newByteSequenceEncoder]]
* @deprecated use [[newSequenceEncoder]]
*/
def newByteSeqEncoder: Encoder[Seq[Byte]] = ExpressionEncoder()

/**
* @since 1.6.1
* @deprecated use [[newShortSequenceEncoder]]
* @deprecated use [[newSequenceEncoder]]
*/
def newShortSeqEncoder: Encoder[Seq[Short]] = ExpressionEncoder()

/**
* @since 1.6.1
* @deprecated use [[newBooleanSequenceEncoder]]
* @deprecated use [[newSequenceEncoder]]
*/
def newBooleanSeqEncoder: Encoder[Seq[Boolean]] = ExpressionEncoder()

/**
* @since 1.6.1
* @deprecated use [[newStringSequenceEncoder]]
* @deprecated use [[newSequenceEncoder]]
*/
def newStringSeqEncoder: Encoder[Seq[String]] = ExpressionEncoder()

/**
* @since 1.6.1
* @deprecated use [[newProductSequenceEncoder]]
* @deprecated use [[newSequenceEncoder]]
*/
implicit def newProductSeqEncoder[A <: Product : TypeTag]: Encoder[Seq[A]] = ExpressionEncoder()
def newProductSeqEncoder[A <: Product : TypeTag]: Encoder[Seq[A]] = ExpressionEncoder()

/** @since 2.2.0 */
implicit def newIntSequenceEncoder[T <: Seq[Int] : TypeTag]: Encoder[T] =
ExpressionEncoder()

/** @since 2.2.0 */
implicit def newLongSequenceEncoder[T <: Seq[Long] : TypeTag]: Encoder[T] =
ExpressionEncoder()

/** @since 2.2.0 */
implicit def newDoubleSequenceEncoder[T <: Seq[Double] : TypeTag]: Encoder[T] =
ExpressionEncoder()

/** @since 2.2.0 */
implicit def newFloatSequenceEncoder[T <: Seq[Float] : TypeTag]: Encoder[T] =
ExpressionEncoder()

/** @since 2.2.0 */
implicit def newByteSequenceEncoder[T <: Seq[Byte] : TypeTag]: Encoder[T] =
ExpressionEncoder()

/** @since 2.2.0 */
implicit def newShortSequenceEncoder[T <: Seq[Short] : TypeTag]: Encoder[T] =
ExpressionEncoder()

/** @since 2.2.0 */
implicit def newBooleanSequenceEncoder[T <: Seq[Boolean] : TypeTag]: Encoder[T] =
ExpressionEncoder()

/** @since 2.2.0 */
implicit def newStringSequenceEncoder[T <: Seq[String] : TypeTag]: Encoder[T] =
ExpressionEncoder()

/** @since 2.2.0 */
implicit def newProductSequenceEncoder[T <: Seq[Product] : TypeTag]: Encoder[T] =
ExpressionEncoder()
implicit def newSequenceEncoder[T <: Seq[_] : TypeTag]: Encoder[T] = ExpressionEncoder()

// Arrays

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,11 @@ class DatasetPrimitiveSuite extends QueryTest with SharedSQLContext {
ListClass(List(1)) -> Queue("test" -> SeqClass(Seq(2))))
}

test("nested sequences") {
checkDataset(Seq(Seq(Seq(1))).toDS(), Seq(Seq(1)))
checkDataset(Seq(List(Queue(1))).toDS(), List(Queue(1)))
}

test("package objects") {
import packageobject._
checkDataset(Seq(PackageClass(1)).toDS(), PackageClass(1))
Expand Down

0 comments on commit 41d8d21

Please sign in to comment.