Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using new Scala 3 language features #682

Merged
merged 11 commits into from
May 4, 2024
Merged
2 changes: 1 addition & 1 deletion docs/pages/performance/fashion-mnist/plot.b64

Large diffs are not rendered by default.

Binary file modified docs/pages/performance/fashion-mnist/plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
16 changes: 8 additions & 8 deletions docs/pages/performance/fashion-mnist/results.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
|Model|Parameters|Recall|Queries per Second|
|---|---|---|---|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=0|0.378|383.334|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=0|0.446|324.105|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=3|0.635|304.168|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=3|0.716|262.273|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=0|0.767|339.240|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=0|0.847|291.800|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=3|0.921|232.473|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=3|0.960|206.758|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=0|0.379|383.178|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=0|0.447|324.668|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=3|0.635|293.685|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=3|0.717|261.210|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=0|0.767|334.495|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=0|0.847|289.825|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=3|0.922|228.624|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=3|0.960|204.063|
Original file line number Diff line number Diff line change
@@ -1,17 +1,5 @@
package com.klibisz.elastiknn.api

sealed trait Similarity

object Similarity {
case object Cosine extends Similarity

case object Hamming extends Similarity

case object Jaccard extends Similarity

case object L1 extends Similarity

case object L2 extends Similarity

val values: Seq[Similarity] = Vector(Cosine, Jaccard, Hamming, L1, L2)
enum Similarity {
case Cosine, Hamming, Jaccard, L1, L2
}
19 changes: 7 additions & 12 deletions elastiknn-api4s/src/main/scala/com/klibisz/elastiknn/api/Vec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,7 @@ sealed trait Vec

object Vec {

sealed trait KnownDims {
this: Vec =>
def dims: Int
}

final case class SparseBool(trueIndices: Array[Int], totalIndices: Int) extends Vec with KnownDims {
final case class SparseBool(trueIndices: Array[Int], totalIndices: Int) extends Vec {
def sorted(): SparseBool = copy(trueIndices.sorted)

def isSorted: Boolean = {
Expand All @@ -37,17 +32,17 @@ object Vec {

object SparseBool {

def random(totalIndices: Int, bias: Double = 0.5)(implicit rng: Random): SparseBool = {
def random(totalIndices: Int, bias: Double = 0.5)(using rng: Random): SparseBool = {
var trueIndices = Array.empty[Int]
(0 until totalIndices).foreach(i => if (rng.nextDouble() <= bias) trueIndices :+= i else ())
SparseBool(trueIndices, totalIndices)
}

def randoms(totalIndices: Int, n: Int, bias: Double = 0.5)(implicit rng: Random): Vector[SparseBool] =
def randoms(totalIndices: Int, n: Int, bias: Double = 0.5)(using rng: Random): Vector[SparseBool] =
(0 until n).map(_ => random(totalIndices, bias)).toVector
}

final case class DenseFloat(values: Array[Float]) extends Vec with KnownDims {
final case class DenseFloat(values: Array[Float]) extends Vec {
override def equals(other: Any): Boolean = other match {
case other: DenseFloat => other.values sameElements values
case _ => false
Expand All @@ -64,21 +59,21 @@ object Vec {
dp
}

override def dims: Int = values.length
def dims: Int = values.length
}

object DenseFloat {
def apply(values: Float*): DenseFloat = DenseFloat(values.toArray)

def random(length: Int, unit: Boolean = false, scale: Int = 1)(implicit rng: Random): DenseFloat = {
def random(length: Int, unit: Boolean = false, scale: Int = 1)(using rng: Random): DenseFloat = {
val v = DenseFloat((0 until length).toArray.map(_ => rng.nextGaussian().toFloat * scale))
if (unit) {
val norm = math.sqrt(v.values.map(x => x * x).sum.toDouble).toFloat
DenseFloat(v.values.map(_ / norm))
} else v
}

def randoms(length: Int, n: Int, unit: Boolean = false, scale: Int = 1)(implicit rng: Random): Vector[DenseFloat] =
def randoms(length: Int, n: Int, unit: Boolean = false, scale: Int = 1)(using rng: Random): Vector[DenseFloat] =
(0 until n).map(_ => random(length, unit, scale)).toVector
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,24 +36,24 @@ object XContentCodec {

private val xcJson = XContentType.JSON.xContent()

private def encodeUnsafe[T](t: T, b: XContentBuilder)(implicit c: Encoder[T]): Unit =
private def encodeUnsafe[T](t: T, b: XContentBuilder)(using c: Encoder[T]): Unit =
c.encodeUnsafe(t, b)

def encodeUnsafeToByteArray[T](t: T)(implicit c: Encoder[T]): Array[Byte] = {
def encodeUnsafeToByteArray[T](t: T)(using c: Encoder[T]): Array[Byte] = {
val bos = new ByteArrayOutputStream()
val b = new XContentBuilder(XContentType.JSON.xContent(), bos)
encodeUnsafe(t, b)
b.close()
bos.toByteArray
}

def encodeUnsafeToString[T](t: T)(implicit c: Encoder[T]): String =
def encodeUnsafeToString[T](t: T)(using c: Encoder[T]): String =
new String(encodeUnsafeToByteArray(t))

def decodeUnsafe[T](p: XContentParser)(implicit c: Decoder[T]): T =
def decodeUnsafe[T](p: XContentParser)(using c: Decoder[T]): T =
c.decodeUnsafe(p)

def decodeUnsafeFromMap[T](m: java.util.Map[String, Object])(implicit c: Decoder[T]): T = {
def decodeUnsafeFromMap[T](m: java.util.Map[String, Object])(using c: Decoder[T]): T = {
val bos = new ByteArrayOutputStream()
val builder = new XContentBuilder(xcJson, bos)
builder.map(m)
Expand All @@ -62,7 +62,7 @@ object XContentCodec {
c.decodeUnsafe(parser)
}

def decodeUnsafeFromList[T](l: java.util.List[Object])(implicit c: Decoder[T]): T = {
def decodeUnsafeFromList[T](l: java.util.List[Object])(using c: Decoder[T]): T = {
val bos = new ByteArrayOutputStream()
val builder = new XContentBuilder(xcJson, bos)
builder.value(l)
Expand All @@ -71,10 +71,10 @@ object XContentCodec {
c.decodeUnsafe(parser)
}

def decodeUnsafeFromString[T](str: String)(implicit d: Decoder[T]): T =
def decodeUnsafeFromString[T](str: String)(using d: Decoder[T]): T =
decodeUnsafeFromByteArray(str.getBytes)

def decodeUnsafeFromByteArray[T](barr: Array[Byte])(implicit c: Decoder[T]): T = {
def decodeUnsafeFromByteArray[T](barr: Array[Byte])(using c: Decoder[T]): T = {
val parser = xcJson.createParser(XContentParserConfiguration.EMPTY, barr)
c.decodeUnsafe(parser)
}
Expand Down Expand Up @@ -411,8 +411,8 @@ object XContentCodec {
case t => throw new XContentParseException(unexpectedToken(t, SortedSet(START_ARRAY, VALUE_NUMBER)))
}
while (p.nextToken() != END_ARRAY) {
assertToken(p.currentToken(), VALUE_NUMBER)
b.append(p.floatValue())
try b.append(p.floatValue())
catch case _ => assertToken(p.currentToken(), VALUE_NUMBER)
}
b.toArray
}
Expand All @@ -425,8 +425,8 @@ object XContentCodec {
case t => throw new XContentParseException(unexpectedToken(t, SortedSet(START_ARRAY, VALUE_NUMBER)))
}
while (p.nextToken() != END_ARRAY) {
assertToken(p.currentToken(), VALUE_NUMBER)
b.append(p.intValue())
try b.append(p.intValue())
catch case _ => assertToken(p.currentToken(), VALUE_NUMBER)
}
b.toArray
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class XContentCodecSuite extends AnyFreeSpec with Matchers {

import XContentCodec._

private implicit val rng: Random = new Random(0)
private given rng: Random = new Random(0)

private def shuffle(original: Json): Json = original.asObject match {
case Some(obj) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,8 @@ import com.klibisz.elastiknn.api.{NearestNeighborsQuery, XContentCodec}
import com.sksamuel.elastic4s.requests.searches.queries.{Query, RawQuery}

trait Elastic4sCompatibility {

implicit def convertQuery(nnq: NearestNeighborsQuery): Query = nnq.toQuery

implicit class NearestNeighborsQueryCompat(nnq: NearestNeighborsQuery) {
def toQuery: Query = RawQuery(s"""{"elastiknn_nearest_neighbors":${XContentCodec.encodeUnsafeToString(nnq)}}""")
}
given convertQuery: Conversion[NearestNeighborsQuery, Query] =
nnq => RawQuery(s"""{"elastiknn_nearest_neighbors":${XContentCodec.encodeUnsafeToString(nnq)}}""")
}

object Elastic4sCompatibility extends Elastic4sCompatibility
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ trait ElastiknnClient[F[_]] extends AutoCloseable {

/** Abstract method for executing a request.
*/
def execute[T, U](request: T)(implicit handler: Handler[T, U], javaTypeable: JavaTypeable[U]): F[Response[U]]
def execute[T, U](request: T)(using handler: Handler[T, U], javaTypeable: JavaTypeable[U]): F[Response[U]]

/** Execute the given request.
*/
final def apply[T, U](request: T)(implicit handler: Handler[T, U], javaTypeable: JavaTypeable[U]): F[Response[U]] = execute(request)
final def apply[T, U](request: T)(using handler: Handler[T, U], javaTypeable: JavaTypeable[U]): F[Response[U]] = execute(request)

/** See ElastiknnRequests.putMapping().
*/
Expand All @@ -41,8 +41,6 @@ trait ElastiknnClient[F[_]] extends AutoCloseable {
* How many shards, 1 by default.
* @param replicas
* How many replicas, 1 by default.
* @param elastiknn
* Value for `index.elastiknn` setting, true by default.
* @return
* CreateIndexResponse
*/
Expand Down Expand Up @@ -79,7 +77,7 @@ trait ElastiknnClient[F[_]] extends AutoCloseable {

// Handler that reads the id from the stored field and places it in the id field.
// Otherwise it will be null since [[ElastiknnRequests.nearestNeighbors]] doesn't return stored fields.
implicit val handler: Handler[SearchRequest, SearchResponse] = new Handler[SearchRequest, SearchResponse] {
given handler: Handler[SearchRequest, SearchResponse] = new Handler[SearchRequest, SearchResponse] {
override def build(t: SearchRequest): ElasticRequest = SearchHandler.build(t)
override def responseHandler: ResponseHandler[SearchResponse] = (response: HttpResponse) => {
val handled: Either[ElasticError, SearchResponse] = SearchHandler.responseHandler.handle(response)
Expand All @@ -94,7 +92,7 @@ trait ElastiknnClient[F[_]] extends AutoCloseable {
}
}
}
execute(ElastiknnRequests.nearestNeighbors(index, query, k, storedIdField))(handler, implicitly[JavaTypeable[SearchResponse]])
execute(ElastiknnRequests.nearestNeighbors(index, query, k, storedIdField))
}

}
Expand All @@ -113,13 +111,13 @@ object ElastiknnClient {
* @return
* [[ElastiknnFutureClient]]
*/
def futureClient(restClient: RestClient, strictFailure: Boolean)(implicit ec: ExecutionContext): ElastiknnFutureClient = {
def futureClient(restClient: RestClient, strictFailure: Boolean)(using ec: ExecutionContext): ElastiknnFutureClient = {
val jc: JavaClient = new JavaClient(restClient)
new ElastiknnFutureClient {
implicit val executor: Executor[Future] = Executor.FutureExecutor(ec)
implicit val functor: Functor[Future] = Functor.FutureFunctor(ec)
given executor: Executor[Future] = Executor.FutureExecutor(ec)
given functor: Functor[Future] = Functor.FutureFunctor(ec)
val elasticClient: ElasticClient = ElasticClient(jc)
override def execute[T, U](req: T)(implicit handler: Handler[T, U], javaTypeable: JavaTypeable[U]): Future[Response[U]] = {
override def execute[T, U](req: T)(using handler: Handler[T, U], javaTypeable: JavaTypeable[U]): Future[Response[U]] = {
val future: Future[Response[U]] = elasticClient.execute(req)
if (strictFailure) future.flatMap { res =>
checkResponse(req, res) match {
Expand Down Expand Up @@ -149,7 +147,7 @@ object ElastiknnClient {
* @return
* [[ElastiknnFutureClient]]
*/
def futureClient(host: String = "localhost", port: Int = 9200, strictFailure: Boolean = true, timeoutMillis: Int = 30000)(implicit
def futureClient(host: String = "localhost", port: Int = 9200, strictFailure: Boolean = true, timeoutMillis: Int = 30000)(using
ec: ExecutionContext
): ElastiknnFutureClient = {
val rc: RestClient = RestClient
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.klibisz.elastiknn.client

import com.klibisz.elastiknn.api.{Mapping, NearestNeighborsQuery, Vec, XContentCodec}
import com.klibisz.elastiknn.client.Elastic4sCompatibility._
import com.klibisz.elastiknn.client.Elastic4sCompatibility.given
import com.sksamuel.elastic4s.json.{JacksonBuilder, XContentFactory}
import com.sksamuel.elastic4s.requests.indexes.IndexRequest
import com.sksamuel.elastic4s.requests.mappings.PutMappingRequest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import scala.util.Random

@State(Scope.Benchmark)
class FloatArrayBufferBenchmarksState {
implicit private val rng: Random = new Random(0)
private given rng: Random = new Random(0)
val lst768 = (0 until 768).map(_ => rng.nextFloat()).toList
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import scala.util.Random

@State(Scope.Benchmark)
class FloatVectorOpsBenchmarkState {
implicit private val rng: Random = new Random(0)
private given rng: Random = new Random(0)
val v1: Array[Float] = Vec.DenseFloat.random(999).values
val v2: Array[Float] = Vec.DenseFloat.random(999).values
val panama = new PanamaFloatVectorOps
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import scala.util.Random

@State(Scope.Benchmark)
class VectorSerializationBenchmarksState {
implicit private val rng: Random = new Random(0)
private given rng: Random = new Random(0)
val floatArray = (0 until 1000).map(_ => rng.nextFloat()).toArray
val floatArraySerialized = ByteBufferSerialization.writeFloats(floatArray)
val intArray = (0 until 1000).map(_ => rng.nextInt()).toArray
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import scala.util.Random
class CosineLshModelSuite extends AnyFunSuite with Matchers {

test("model is invariant to vector magnitude") {
implicit val rng: Random = new Random(0)
given rng: Random = new Random(0)
val dims = 10
for {
l <- 1 to 100 by 10
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,17 @@ import org.scalatest.matchers.should.Matchers

class HammingLshModelSuite extends AnyFunSuite with Matchers {

private given rng: util.Random = new util.Random()

test("correct number of hashes when L * k < dims") {
new util.Random()
val vec = Vec.SparseBool.random(10000)(new util.Random(0))
val vec = Vec.SparseBool.random(10000)
val model = new HammingLshModel(vec.dims, 10, 3, new Random(0))
val hashes = model.hash(vec.trueIndices, vec.totalIndices)
hashes should have length 10
}

test("correct number of hashes when L * k >= dims") {
val vec = Vec.SparseBool.random(200)(new util.Random(0))
val vec = Vec.SparseBool.random(200)
val model = new HammingLshModel(vec.dims, 70, 4, new Random(0))
val hashes = model.hash(vec.trueIndices, vec.totalIndices)
hashes should have length 70
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import scala.util.Random

class L2LshSuite extends AnyFunSuite with Matchers {

implicit val rng: Random = new Random(0)
given rng: Random = new Random(0)

test("produces exactly L hashes with probes = 0") {
val vec = Vec.DenseFloat.random(10)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class PermutationLshModelSuite extends AnyFunSuite with Matchers {
}

test("deterministic hashing") {
implicit val rng: Random = new Random(0)
given rng: Random = new Random(0)
val dims = 1024
val mlsh = new PermutationLshModel(128, true)
(0 until 100).foreach { _ =>
Expand All @@ -56,7 +56,7 @@ class PermutationLshModelSuite extends AnyFunSuite with Matchers {
}

test("model is invariant to vector magnitude") {
implicit val rng: Random = new Random(0)
given rng: Random = new Random(0)
val dims = 10
for {
isUnit <- Seq(true, false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class PanamaFloatVectorOpsSpec extends AnyFreeSpec with Matchers {
private val dfvo = new DefaultFloatVectorOps
private val pfvo = new PanamaFloatVectorOps
private val seed = System.currentTimeMillis()
private implicit val rng: Random = new Random(seed)
private given rng: Random = new Random(seed)
info(s"Testing with seed $seed")

private def compare(f1: Double, f2: Double) = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class ApproximateQueryTotalHitsSuite extends AsyncFunSuite with Matchers with El

test("same approximate query should return same total hits") {

implicit val rng: Random = new Random(0)
given rng: Random = new Random(0)
val (index, vecField, idField, dims) = ("issue-240", "vec", "id", 80)
val corpus = Vec.DenseFloat.randoms(dims, 9999)
val ids = corpus.indices.map(i => s"v$i")
Expand All @@ -27,7 +27,7 @@ class ApproximateQueryTotalHitsSuite extends AsyncFunSuite with Matchers with El
val running = shuffled.map { case (q, i) =>
eknn.nearestNeighbors(index, q, k, idField).map(r => (i, r.result.totalHits, r.result.hits.hits.toVector.map(_.id)))
}
Future.sequence(running).map(_.sortBy(_._1))
Future.sequence(running).map(_.sortBy((id, _, _) => id))
}

for {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import scala.util.Random

class CloseIndexRegressionSuite extends AsyncFunSuite with Matchers with ElasticAsyncClient {

implicit val rng: Random = new Random(0)
given rng: Random = new Random(0)

test("close index without elastiknn setting") {
val index = "issue-215-close-no-elastiknn"
Expand Down
Loading
Loading