-
Notifications
You must be signed in to change notification settings - Fork 95
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
22 changed files
with
666 additions
and
659 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
125 changes: 62 additions & 63 deletions
125
...scala/pl/touk/nussknacker/engine/kafka/consumerrecord/ConsumerRecordToJsonFormatter.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,88 +1,87 @@ | ||
package pl.touk.nussknacker.engine.kafka.consumerrecord | ||
|
||
import java.nio.charset.{Charset, StandardCharsets} | ||
import java.nio.charset.StandardCharsets | ||
|
||
import com.github.ghik.silencer.silent | ||
import io.circe.Decoder.Result | ||
import io.circe.{Decoder, Encoder, HCursor, Json} | ||
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} | ||
import io.circe.{Decoder, Encoder} | ||
import org.apache.flink.streaming.connectors.kafka.{KafkaDeserializationSchema, KafkaSerializationSchema} | ||
import org.apache.kafka.clients.consumer.ConsumerRecord | ||
import org.apache.kafka.common.header.Headers | ||
import org.apache.kafka.common.record.TimestampType | ||
import pl.touk.nussknacker.engine.api.CirceUtil | ||
import pl.touk.nussknacker.engine.api.test.{TestDataSplit, TestParsingUtils} | ||
import pl.touk.nussknacker.engine.kafka.{ConsumerRecordUtils, RecordFormatter} | ||
import pl.touk.nussknacker.engine.util.json.BestEffortJsonEncoder | ||
import pl.touk.nussknacker.engine.kafka.consumerrecord.SerializableConsumerRecord._ | ||
|
||
import scala.annotation.nowarn | ||
|
||
@silent("deprecated") | ||
@nowarn("cat=deprecation") | ||
class ConsumerRecordToJsonFormatter extends RecordFormatter { | ||
class ConsumerRecordToJsonFormatter[K: Encoder:Decoder, V: Encoder:Decoder](deserializationSchema: KafkaDeserializationSchema[ConsumerRecord[K, V]], | ||
serializationSchema: KafkaSerializationSchema[ConsumerRecord[K, V]]) | ||
extends RecordFormatter { | ||
|
||
private val cs: Charset = StandardCharsets.UTF_8 | ||
|
||
// TODO: add better key-value encoding and decoding | ||
implicit val consumerRecordEncoder: Encoder[ConsumerRecord[Array[Byte], Array[Byte]]] = { | ||
val encode: Encoder[Any] = BestEffortJsonEncoder(failOnUnkown = false).circeEncoder | ||
new Encoder[ConsumerRecord[Array[Byte], Array[Byte]]] { | ||
override def apply(a: ConsumerRecord[Array[Byte], Array[Byte]]): Json = Json.obj( | ||
"topic" -> encode(a.topic()), | ||
"partition" -> encode(a.partition()), | ||
"offset" -> encode(a.offset()), | ||
"timestamp" -> encode(a.timestamp()), | ||
"timestampType" -> encode(a.timestampType().name), | ||
"serializedKeySize" -> encode(a.serializedKeySize()), | ||
"serializedValueSize" -> encode(a.serializedValueSize()), | ||
"key" -> encode(Option(a.key()).map(bytes => new String(bytes)).orNull), | ||
"value" -> encode(new String(a.value())), | ||
"leaderEpoch" -> encode(a.leaderEpoch().orElse(null)), | ||
"checksum" -> encode(a.checksum()), | ||
"headers" -> encode(ConsumerRecordUtils.toMap(a.headers())) | ||
) | ||
} | ||
} | ||
|
||
implicit val consumerRecordDecoder: Decoder[ConsumerRecord[Array[Byte], Array[Byte]]] = { | ||
new Decoder[ConsumerRecord[Array[Byte], Array[Byte]]] { | ||
override def apply(c: HCursor): Result[ConsumerRecord[Array[Byte], Array[Byte]]] = { | ||
for { | ||
topic <- c.downField("topic").as[String].right | ||
partition <- c.downField("partition").as[Int].right | ||
offset <- c.downField("offset").as[Long].right | ||
timestamp <- c.downField("timestamp").as[Option[Long]].right | ||
timestampType <- c.downField("timestampType").as[Option[String]].right | ||
serializedKeySize <- c.downField("serializedKeySize").as[Option[Int]].right | ||
serializedValueSize <- c.downField("serializedValueSize").as[Option[Int]].right | ||
key <- c.downField("key").as[Option[String]].right | ||
value <- c.downField("value").as[Option[String]].right | ||
leaderEpoch <- c.downField("leaderEpoch").as[Option[Int]].right | ||
checksum <- c.downField("checksum").as[Option[java.lang.Long]].right | ||
headers <- c.downField("headers").as[Map[String, Option[String]]].right | ||
} yield new ConsumerRecord[Array[Byte], Array[Byte]]( | ||
topic, | ||
partition, | ||
offset, | ||
timestamp.getOrElse(ConsumerRecord.NO_TIMESTAMP), | ||
timestampType.map(TimestampType.forName).getOrElse(TimestampType.NO_TIMESTAMP_TYPE), | ||
checksum.getOrElse(ConsumerRecord.NULL_CHECKSUM.toLong), | ||
serializedKeySize.getOrElse(ConsumerRecord.NULL_SIZE), | ||
serializedValueSize.getOrElse(ConsumerRecord.NULL_SIZE), | ||
key.map(_.getBytes()).orNull, | ||
value.map(_.getBytes()).orNull, | ||
ConsumerRecordUtils.toHeaders(headers.mapValues(v => v.orNull)), | ||
java.util.Optional.ofNullable(leaderEpoch.map(Integer.valueOf).orNull) | ||
) | ||
} | ||
} | ||
} | ||
implicit val consumerRecordDecoder: Decoder[SerializableConsumerRecord[K, V]] = deriveDecoder | ||
implicit val consumerRecordEncoder: Encoder[SerializableConsumerRecord[K, V]] = deriveEncoder | ||
|
||
override protected def formatRecord(record: ConsumerRecord[Array[Byte], Array[Byte]]): Array[Byte] = { | ||
implicitly[Encoder[ConsumerRecord[Array[Byte], Array[Byte]]]].apply(record).noSpaces.getBytes(cs) | ||
val deserializedRecord = deserializationSchema.deserialize(record) | ||
val serializableRecord = SerializableConsumerRecord( | ||
Option(deserializedRecord.key()), | ||
deserializedRecord.value(), | ||
Option(deserializedRecord.topic()), | ||
Option(deserializedRecord.partition()), | ||
Option(deserializedRecord.offset()), | ||
Option(deserializedRecord.timestamp()), | ||
Option(ConsumerRecordUtils.toMap(deserializedRecord.headers()).mapValues(s => Option(s))) | ||
) | ||
implicitly[Encoder[SerializableConsumerRecord[K, V]]].apply(serializableRecord).noSpaces.getBytes(StandardCharsets.UTF_8) | ||
} | ||
|
||
override protected def parseRecord(topic: String, bytes: Array[Byte]): ConsumerRecord[Array[Byte], Array[Byte]] = { | ||
CirceUtil.decodeJsonUnsafe[ConsumerRecord[Array[Byte], Array[Byte]]](bytes) | ||
val serializableRecord = CirceUtil.decodeJsonUnsafe[SerializableConsumerRecord[K, V]](bytes) // decode json in SerializableConsumerRecord[K, V] domain | ||
val serializableConsumerRecord = SerializableConsumerRecord.from(topic, serializableRecord) // update with defaults if fields are missing in json | ||
// Here serialization schema and ProducerRecord are used to transform key and value to proper Array[Byte]. | ||
// Other properties are ignored by serializer and are based on values provided by decoded json (or default empty values). | ||
val producerRecord = serializationSchema.serialize(serializableConsumerRecord, serializableConsumerRecord.timestamp()) // serialize K and V to Array[Byte] | ||
createConsumerRecord( | ||
serializableConsumerRecord.topic, | ||
serializableConsumerRecord.partition, | ||
serializableConsumerRecord.offset, | ||
serializableConsumerRecord.timestamp, | ||
producerRecord.key(), | ||
producerRecord.value(), | ||
producerRecord.headers() | ||
) | ||
} | ||
|
||
override protected def testDataSplit: TestDataSplit = TestParsingUtils.newLineSplit | ||
|
||
} | ||
|
||
|
||
case class SerializableConsumerRecord[K, V](key: Option[K], value: V, topic: Option[String], partition: Option[Int], offset: Option[Long], timestamp: Option[Long], headers: Option[Map[String, Option[String]]]) | ||
|
||
object SerializableConsumerRecord { | ||
|
||
def createConsumerRecord[K, V](topic: String, partition: Int, offset: Long, timestamp: Long, key: K, value: V, headers: Headers): ConsumerRecord[K, V] = { | ||
new ConsumerRecord(topic, partition, offset, timestamp, | ||
TimestampType.NO_TIMESTAMP_TYPE, ConsumerRecord.NULL_CHECKSUM.longValue(), | ||
ConsumerRecord.NULL_SIZE, ConsumerRecord.NULL_SIZE, | ||
key, value, headers | ||
) | ||
} | ||
|
||
def from[K, V](topic: String, record: SerializableConsumerRecord[K, V]): ConsumerRecord[K, V] = { | ||
createConsumerRecord( | ||
record.topic.getOrElse(topic), | ||
record.partition.getOrElse(0), | ||
record.offset.getOrElse(0L), | ||
record.timestamp.getOrElse(ConsumerRecord.NO_TIMESTAMP), | ||
record.key.getOrElse(null.asInstanceOf[K]), | ||
record.value, | ||
ConsumerRecordUtils.toHeaders(record.headers.map(_.mapValues(_.orNull)).getOrElse(Map.empty)) | ||
) | ||
} | ||
} |
Oops, something went wrong.