Skip to content

Commit

Permalink
adding openvino support to all ClassificationForXXX annotators (#14408)
Browse files Browse the repository at this point in the history
* adding DebertaForXXX support

* Uploading the remaining files

* Delete examples/python/transformers/openvino/HuggingFace_OpenVINO_in_Spark_NLP_CamemBERT.ipynbshield

* Delete examples/python/transformers/openvino/HuggingFace_OpenVINO_in_Spark_NLP_XlmRoBertaForTokenClassification.ipynbshield

* Delete examples/python/transformers/openvino/HuggingFace_OpenVINO_in_Spark_NLP_XlmRoBertaForZeroShotClassification.ipynbshield

* adding openvino Suppor tto multiple Annotators

* Update Bart.scala
  • Loading branch information
ahmedlone127 authored Dec 9, 2024
1 parent 6653546 commit 4b2aa30
Show file tree
Hide file tree
Showing 124 changed files with 89,872 additions and 1,839 deletions.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

41 changes: 40 additions & 1 deletion src/main/scala/com/johnsnowlabs/ml/ai/Albert.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@ package com.johnsnowlabs.ml.ai
import ai.onnxruntime.OnnxTensor
import com.johnsnowlabs.ml.ai.util.PrepareEmbeddings
import com.johnsnowlabs.ml.onnx.{OnnxSession, OnnxWrapper}
import com.johnsnowlabs.ml.openvino.OpenvinoWrapper
import com.johnsnowlabs.ml.tensorflow.sentencepiece.{SentencePieceWrapper, SentencepieceEncoder}
import com.johnsnowlabs.ml.tensorflow.sign.{ModelSignatureConstants, ModelSignatureManager}
import com.johnsnowlabs.ml.tensorflow.{TensorResources, TensorflowWrapper}
import com.johnsnowlabs.ml.util.{ONNX, TensorFlow}
import com.johnsnowlabs.ml.util.{ONNX, Openvino, TensorFlow}
import com.johnsnowlabs.nlp.annotators.common._
import org.intel.openvino.Tensor
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.JavaConverters._
Expand Down Expand Up @@ -71,6 +73,7 @@ import scala.collection.JavaConverters._
private[johnsnowlabs] class Albert(
val tensorflowWrapper: Option[TensorflowWrapper],
val onnxWrapper: Option[OnnxWrapper],
val openvinoWrapper: Option[OpenvinoWrapper],
val spp: SentencePieceWrapper,
batchSize: Int,
configProtoBytes: Option[Array[Byte]] = None,
Expand All @@ -83,6 +86,7 @@ private[johnsnowlabs] class Albert(
val detectedEngine: String =
if (tensorflowWrapper.isDefined) TensorFlow.name
else if (onnxWrapper.isDefined) ONNX.name
else if (openvinoWrapper.isDefined) Openvino.name
else TensorFlow.name
private val onnxSessionOptions: Map[String, String] = new OnnxSession().getSessionOptions

Expand Down Expand Up @@ -155,6 +159,41 @@ private[johnsnowlabs] class Albert(
maskTensors.close()
segmentTensors.close()
}


case Openvino.name =>



val batchLength = batch.length
val shape = Array(batchLength, maxSentenceLength)
val (tokenTensors, maskTensors) =
PrepareEmbeddings.prepareOvLongBatchTensors(batch, maxSentenceLength, batchLength)
val segmentTensors = new Tensor(shape, Array.fill(batchLength * maxSentenceLength)(0L))

val inferRequest = openvinoWrapper.get.getCompiledModel().create_infer_request()
inferRequest.set_tensor("input_ids", tokenTensors)
inferRequest.set_tensor("attention_mask", maskTensors)
inferRequest.set_tensor("token_type_ids", segmentTensors)

inferRequest.infer()

try {
try {
inferRequest
.get_tensor("last_hidden_state")
.data()
}
} catch {
case e: Exception =>
e.printStackTrace()
Array.empty[Float]
// Rethrow the exception to propagate it further
throw e
}



case _ =>
val tensors = new TensorResources()

Expand Down
58 changes: 57 additions & 1 deletion src/main/scala/com/johnsnowlabs/ml/ai/BGE.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@
package com.johnsnowlabs.ml.ai

import ai.onnxruntime.{OnnxTensor, TensorInfo}
import com.johnsnowlabs.ml.ai.util.PrepareEmbeddings
import com.johnsnowlabs.ml.onnx.{OnnxSession, OnnxWrapper}
import com.johnsnowlabs.ml.openvino.OpenvinoWrapper
import com.johnsnowlabs.ml.tensorflow.sign.{ModelSignatureConstants, ModelSignatureManager}
import com.johnsnowlabs.ml.tensorflow.{TensorResources, TensorflowWrapper}
import com.johnsnowlabs.ml.util.{LinAlg, ONNX, TensorFlow}
import com.johnsnowlabs.ml.util.{LinAlg, ONNX, Openvino, TensorFlow}
import com.johnsnowlabs.nlp.annotators.common._
import com.johnsnowlabs.nlp.{Annotation, AnnotatorType}
import org.intel.openvino.Tensor
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.JavaConverters._
Expand All @@ -42,6 +45,7 @@ import scala.collection.JavaConverters._
private[johnsnowlabs] class BGE(
val tensorflowWrapper: Option[TensorflowWrapper],
val onnxWrapper: Option[OnnxWrapper],
val openvinoWrapper: Option[OpenvinoWrapper],
configProtoBytes: Option[Array[Byte]] = None,
sentenceStartTokenId: Int,
sentenceEndTokenId: Int,
Expand All @@ -57,6 +61,7 @@ private[johnsnowlabs] class BGE(
val detectedEngine: String =
if (tensorflowWrapper.isDefined) TensorFlow.name
else if (onnxWrapper.isDefined) ONNX.name
else if (openvinoWrapper.isDefined) Openvino.name
else TensorFlow.name
private val onnxSessionOptions: Map[String, String] = new OnnxSession().getSessionOptions

Expand All @@ -72,6 +77,9 @@ private[johnsnowlabs] class BGE(
val embeddings = detectedEngine match {
case ONNX.name =>
getSentenceEmbeddingFromOnnx(paddedBatch, maxSentenceLength)

case Openvino.name =>
getSentenceEmbeddingFromOv(paddedBatch, maxSentenceLength)
case _ =>
getSentenceEmbeddingFromTF(paddedBatch, maxSentenceLength)
}
Expand Down Expand Up @@ -160,6 +168,54 @@ private[johnsnowlabs] class BGE(
sentenceEmbeddingsFloatsArray
}



private def getSentenceEmbeddingFromOv(
batch: Seq[Array[Int]],
maxSentenceLength: Int): Array[Array[Float]] = {


val batchLength = batch.length
val shape = Array(batchLength, maxSentenceLength)
val tokenTensors =
new org.intel.openvino.Tensor(shape, batch.flatMap(x => x.map(xx => xx.toLong)).toArray)
val attentionMask = batch.map(sentence => sentence.map(x => if (x < 0L) 0L else 1L)).toArray

val maskTensors = new org.intel.openvino.Tensor(
shape,
attentionMask.flatten)

val segmentTensors = new Tensor(shape, Array.fill(batchLength * maxSentenceLength)(0L))
val inferRequest = openvinoWrapper.get.getCompiledModel().create_infer_request()
inferRequest.set_tensor("input_ids", tokenTensors)
inferRequest.set_tensor("attention_mask", maskTensors)
inferRequest.set_tensor("token_type_ids", segmentTensors)

inferRequest.infer()

try {
try {
val lastHiddenState = inferRequest
.get_tensor("last_hidden_state")
val shape = lastHiddenState.get_shape().map(_.toLong)
val flattenEmbeddings = lastHiddenState
.data()
val embeddings = LinAlg.avgPooling(flattenEmbeddings, attentionMask, shape)
val normalizedEmbeddings = LinAlg.l2Normalize(embeddings)
LinAlg.denseMatrixToArray(normalizedEmbeddings)

}
} catch {
case e: Exception =>
e.printStackTrace()
Array.empty[Float]
// Rethrow the exception to propagate it further
throw e
}

}


private def getSentenceEmbeddingFromOnnx(
batch: Seq[Array[Int]],
maxSentenceLength: Int): Array[Array[Float]] = {
Expand Down
Loading

0 comments on commit 4b2aa30

Please sign in to comment.