Skip to content

Commit

Permalink
Merge pull request #178 from twitter/scrooge-serializer
Browse files Browse the repository at this point in the history
Scrooge serializer
  • Loading branch information
johnynek committed Mar 27, 2014
2 parents 1e70e90 + fd7839b commit a47b511
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
Copyright 2013 Twitter, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package com.twitter.chill.scrooge

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.Serializer
import com.esotericsoftware.kryo.io.Input
import com.esotericsoftware.kryo.io.Output
import com.twitter.scrooge.{ThriftStructSerializer, ThriftStructCodec, ThriftStruct}
import org.apache.thrift.protocol.{TBinaryProtocol, TProtocolFactory}
import scala.collection.mutable
import scala.util.Try

/**
* Kryo serializer for Scrooge generated Thrift structs
* this probably isn't thread safe, but neither is Kryo
*/

object ScroogeThriftStructSerializer {
/* don't serialize classToCodec because it contains anonymous inner ThriftStructSerializers that have reference to
* ScroogeThriftStructSerializer, which itself has a reference to classToCodec etc.
*/
@transient lazy private[this] val classToTSS: mutable.Map[Class[_], ThriftStructSerializer[_]] = {
mutable.Map()
}

private def getObject[T](companionClass: Class[T]): AnyRef =
companionClass.getField("MODULE$").get(null)

/**
* For unions, we split on $ after the dot.
* this is costly, but only done once per Class
*/
private[this] def codecForUnion[T <: ThriftStruct](maybeUnion: Class[T]): Try[ThriftStructCodec[T]] =
Try(getObject(Class.forName(maybeUnion.getName.reverse.dropWhile(_ != '$').reverse)))
.map(_.asInstanceOf[ThriftStructCodec[T]])

private[this] def codecForNormal[T <: ThriftStruct](thriftStructClass: Class[T]): Try[ThriftStructCodec[T]] =
Try(getObject(Class.forName(thriftStructClass.getName + "$")))
.map(_.asInstanceOf[ThriftStructCodec[T]])

// the companion to a ThriftStruct generated by scrooge will always be its codec
private[this] def constructCodec[T <: ThriftStruct](thriftStructClass: Class[T]): ThriftStructCodec[T] =
codecForNormal(thriftStructClass)
.orElse(codecForUnion(thriftStructClass))
.get

private[this] def constructThriftStructSerializer[T <: ThriftStruct](thriftStructClass: Class[T]): ThriftStructSerializer[T] = {
// capture the codec here:
val newCodec = constructCodec(thriftStructClass)
new ThriftStructSerializer[T] {
val protocolFactory = new TBinaryProtocol.Factory
override def codec = newCodec
}
}

def lookupThriftStructSerializer[T <: ThriftStruct](thriftStructClass: Class[_ <: T]): ThriftStructSerializer[T] = {
val tss = classToTSS.getOrElseUpdate(thriftStructClass, constructThriftStructSerializer(thriftStructClass))
tss.asInstanceOf[ThriftStructSerializer[T]]
}

def lookupThriftStructSerializer[T <: ThriftStruct](thriftStruct: T): ThriftStructSerializer[T] = {
lookupThriftStructSerializer(thriftStruct.getClass)
}

}

class ScroogeThriftStructSerializer[T <: ThriftStruct] extends Serializer[T] {
import ScroogeThriftStructSerializer._
override def write(kryo: Kryo, output: Output, thriftStruct: T): Unit = {
try {
val thriftStructSerializer = lookupThriftStructSerializer(thriftStruct)
val serThrift = thriftStructSerializer.toBytes(thriftStruct)
output.writeInt(serThrift.length, true)
output.writeBytes(serThrift)
} catch {
case e: Exception => throw new RuntimeException("Could not serialize ThriftStruct of type " + thriftStruct.getClass, e)
}
}



/* nb: thriftStructClass doesn't actually have type Class[T] it has type Class[_ <: T]
* this lie is courtesy of the Kryo API
* */
override def read(kryo: Kryo, input: Input, thriftStructClass: Class[T]): T = {
// code reviewers: is this use of an anonymous inner class ok, or should I separate it out into something outside?
try {
val thriftStructSerializer = lookupThriftStructSerializer(thriftStructClass)
val tSize = input.readInt(true)
val barr = new Array[Byte](tSize)
input.readBytes(barr)
thriftStructSerializer.fromBytes(barr)
} catch {
case e: Exception => throw new RuntimeException("Could not create ThriftStruct " + thriftStructClass, e)
}
}



}
14 changes: 14 additions & 0 deletions project/Build.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ import scala.collection.JavaConverters._

object ChillBuild extends Build {
val kryoVersion = "2.21"
def withCross(dep: ModuleID) =
dep cross CrossVersion.binaryMapped {
case "2.9.3" => "2.9.2" // TODO: hack because twitter hasn't built things against 2.9.3
case version if version startsWith "2.10" => "2.10" // TODO: hack because sbt is broken
case x => x
}

val sharedSettings = Project.defaultSettings ++ mimaDefaultSettings ++ Seq(

Expand Down Expand Up @@ -87,6 +93,7 @@ object ChillBuild extends Build {
).aggregate(
chill,
chillBijection,
chillScrooge,
chillStorm,
chillJava,
chillHadoop,
Expand Down Expand Up @@ -192,6 +199,13 @@ object ChillBuild extends Build {
)
)

lazy val chillScrooge = module("scrooge").settings(
libraryDependencies ++= Seq(
"org.apache.thrift" % "libthrift" % "0.6.1" % "provided",
withCross("com.twitter" %% "scrooge-serializer" % "3.13.0" % "provided")
)
).dependsOn(chill % "test->test;compile->compile")

// This can only have java deps!
lazy val chillProtobuf = module("protobuf").settings(
crossPaths := false,
Expand Down

0 comments on commit a47b511

Please sign in to comment.