forked from apache/spark
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
18 changed files
with
709 additions
and
63 deletions.
There are no files selected for viewing
49 changes: 49 additions & 0 deletions
49
sql/api/src/main/scala/org/apache/spark/sql/streaming/ListState.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.spark.sql.streaming | ||
|
||
import org.apache.spark.annotation.{Evolving, Experimental} | ||
|
||
@Experimental | ||
@Evolving | ||
/** | ||
* Interface used for arbitrary stateful operations with the v2 API to capture | ||
* list value state. | ||
*/ | ||
trait ListState[S] extends Serializable { | ||
|
||
/** Whether state exists or not. */ | ||
def exists(): Boolean | ||
|
||
/** Get the state value if it exists */ | ||
def get(): Iterator[S] | ||
|
||
/** Get the list value as an option if it exists and None otherwise */ | ||
def getOption(): Option[Iterator[S]] | ||
|
||
/** Update the value of the list. */ | ||
def put(newState: Seq[S]): Unit | ||
|
||
/** Append an entry to the list */ | ||
def appendValue(newState: S): Unit | ||
|
||
/** Append an entire list to the existing value */ | ||
def appendList(newState: Seq[S]): Unit | ||
|
||
/** Remove this state. */ | ||
def remove(): Unit | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
119 changes: 119 additions & 0 deletions
119
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImpl.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.spark.sql.execution.streaming | ||
|
||
import org.apache.spark.internal.Logging | ||
import org.apache.spark.sql.execution.streaming.state.StateStore | ||
import org.apache.spark.sql.streaming.ListState | ||
|
||
/** | ||
* Provides concrete implementation for list of values associated with a state variable | ||
* used in the streaming transformWithState operator. | ||
* | ||
* @param store - reference to the StateStore instance to be used for storing state | ||
* @param stateName - name of logical state partition | ||
* @tparam S - data type of object that will be stored in the list | ||
*/ | ||
class ListStateImpl[S](store: StateStore, | ||
stateName: String) extends ListState[S] with Logging { | ||
|
||
/** Whether state exists or not. */ | ||
override def exists(): Boolean = { | ||
val stateValue = store.get(StateEncoder.encodeGroupingKey(stateName), stateName) | ||
stateValue != null | ||
} | ||
|
||
/** Get the state value if it exists. If the state does not exist in state store, an | ||
* empty iterator is returned. */ | ||
override def get(): Iterator[S] = { | ||
val encodedKey = StateEncoder.encodeGroupingKey(stateName) | ||
val unsafeRowValuesIterator = store.valuesIterator(encodedKey, stateName) | ||
new Iterator[S] { | ||
override def hasNext: Boolean = { | ||
unsafeRowValuesIterator.hasNext | ||
} | ||
|
||
override def next(): S = { | ||
val valueUnsafeRow = unsafeRowValuesIterator.next() | ||
StateEncoder.decodeValue(valueUnsafeRow) | ||
} | ||
} | ||
} | ||
|
||
/** Get the list value as an option if it exists and None otherwise. */ | ||
override def getOption(): Option[Iterator[S]] = { | ||
Option(get()) | ||
} | ||
|
||
/** Update the value of the list. */ | ||
override def put(newState: Seq[S]): Unit = { | ||
validateNewState(newState) | ||
|
||
if (newState.isEmpty) { | ||
this.remove() | ||
} else { | ||
val encodedKey = StateEncoder.encodeGroupingKey(stateName) | ||
|
||
var isFirst = true | ||
newState.foreach { v => | ||
val encodedValue = StateEncoder.encodeValue(v) | ||
if (isFirst) { | ||
store.put(encodedKey, encodedValue, stateName) | ||
isFirst = false | ||
} else { | ||
store.merge(encodedKey, encodedValue, stateName) | ||
} | ||
} | ||
} | ||
} | ||
|
||
/** Append an entry to the list. */ | ||
override def appendValue(newState: S): Unit = { | ||
if (newState == null) { | ||
throw new IllegalArgumentException("value added to ListState should be non-null") | ||
} | ||
store.merge(StateEncoder.encodeGroupingKey(stateName), | ||
StateEncoder.encodeValue(newState), stateName) | ||
} | ||
|
||
/** Append an entire list to the existing value. */ | ||
override def appendList(newState: Seq[S]): Unit = { | ||
validateNewState(newState) | ||
|
||
val encodedKey = StateEncoder.encodeGroupingKey(stateName) | ||
newState.foreach { v => | ||
val encodedValue = StateEncoder.encodeValue(v) | ||
store.merge(encodedKey, encodedValue, stateName) | ||
} | ||
} | ||
|
||
/** Remove this state. */ | ||
override def remove(): Unit = { | ||
store.remove(StateEncoder.encodeGroupingKey(stateName), stateName) | ||
} | ||
|
||
private def validateNewState(newState: Seq[S]): Unit = { | ||
if (newState == null) { | ||
throw new IllegalArgumentException("newState list should be non-null") | ||
} | ||
|
||
val containsNullElements = newState.contains(null) | ||
if (containsNullElements) { | ||
throw new IllegalArgumentException("value added to ListState should be non-null") | ||
} | ||
} | ||
} |
60 changes: 60 additions & 0 deletions
60
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateEncoder.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.execution.streaming | ||
|
||
import org.apache.commons.lang3.SerializationUtils | ||
|
||
import org.apache.spark.sql.catalyst.InternalRow | ||
import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow} | ||
import org.apache.spark.sql.types.{BinaryType, StructType} | ||
|
||
/** | ||
* Helper object providing APIs to encodes the grouping key, and user provided values. | ||
*/ | ||
object StateEncoder { | ||
|
||
// TODO: validate places that are trying to encode the key and check if we can eliminate/ | ||
// add caching for some of these calls. | ||
def encodeGroupingKey(stateName: String): UnsafeRow = { | ||
val keyOption = ImplicitKeyTracker.getImplicitKeyOption | ||
if (keyOption.isEmpty) { | ||
throw new UnsupportedOperationException("Implicit key not found for operation on" + | ||
s"stateName=$stateName") | ||
} | ||
|
||
val schemaForKeyRow: StructType = new StructType().add("key", BinaryType) | ||
val keyByteArr = SerializationUtils.serialize(keyOption.get.asInstanceOf[Serializable]) | ||
val keyEncoder = UnsafeProjection.create(schemaForKeyRow) | ||
val keyRow = keyEncoder(InternalRow(keyByteArr)) | ||
keyRow | ||
} | ||
|
||
def encodeValue[S] (value: S): UnsafeRow = { | ||
val schemaForValueRow: StructType = new StructType().add("value", BinaryType) | ||
val valueByteArr = SerializationUtils.serialize(value.asInstanceOf[Serializable]) | ||
val valueEncoder = UnsafeProjection.create(schemaForValueRow) | ||
val valueRow = valueEncoder(InternalRow(valueByteArr)) | ||
valueRow | ||
} | ||
|
||
def decodeValue[S](row: UnsafeRow): S = { | ||
SerializationUtils | ||
.deserialize(row.getBinary(0)) | ||
.asInstanceOf[S] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.