Skip to content

Commit

Permalink
Integrating OpenVINO Runtime in Spark NLP (#14200)
Browse files Browse the repository at this point in the history
* Use OpenVINO model engine for BertEmbeddings

Add OpenVINO model engine wrapper

Add default buffer size for reading weights file

Read OpenVINO IR format models

* Use Long Tensors with XlmRoberta

* Add OpenVINO support for RoBerta and XlmRoBerta embeddings

* Fix data type and formatting

* Add OpenVINO BERT test

- Resole merge conflicts

- Add test for openvino load model

* Update Python APIs to use OpenVINO

* Add param to enable OpenVINO through Python API
* Formatting changes

* Add OpenVINO support for E5 Embeddings

* Enable OpenVINO backend for E5 Embeddings
* Update Python APIs

* Resolve merge issues

* Add OpenVINO support for T5

* Read and write encoder-decoder models with OpenVINO

* OpenVINO Async Inference

* Refactor and cleanup

* Update comments

* Add config to set OpenVINO inference device

* Add OpenVINO support for BERT Sentence Embeddings

* Formatting

* Openvino synchronous inference

* Refactoring: OV Model Conversion

* BertSentenceEmbeddings Python API

* Enable OpenVINO support for Llama2

* Read/write Llama2 Transformer with OpenVINO

* Bugfix: Update saved model filename

* Replace broadcast with addFile for OpenVINO-based annotators

* Add OpenVINO Wrapper tests

* Add suffix to avoid duplication in Spark Files

* Set default OV inference device to CPU

* Use CPU by default for OpenVINO inference due to error loading device config in cluster envs

* Bugfix: Read serialized model from folder
  • Loading branch information
rajatkrishna authored May 21, 2024
1 parent 4419a70 commit fabc4ab
Show file tree
Hide file tree
Showing 36 changed files with 1,507 additions and 124 deletions.
6 changes: 4 additions & 2 deletions python/sparknlp/annotator/embeddings/bert_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def __init__(self, classname="com.johnsnowlabs.nlp.embeddings.BertEmbeddings", j
)

@staticmethod
def loadSavedModel(folder, spark_session):
def loadSavedModel(folder, spark_session, use_openvino=False):
"""Loads a locally saved model.
Parameters
Expand All @@ -173,14 +173,16 @@ def loadSavedModel(folder, spark_session):
Folder of the saved model
spark_session : pyspark.sql.SparkSession
The current SparkSession
use_openvino: bool
Use OpenVINO backend
Returns
-------
BertEmbeddings
The restored model
"""
from sparknlp.internal import _BertLoader
jModel = _BertLoader(folder, spark_session._jsparkSession)._java_obj
jModel = _BertLoader(folder, spark_session._jsparkSession, use_openvino)._java_obj
return BertEmbeddings(java_model=jModel)

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def __init__(self, classname="com.johnsnowlabs.nlp.embeddings.BertSentenceEmbedd
)

@staticmethod
def loadSavedModel(folder, spark_session):
def loadSavedModel(folder, spark_session, use_openvino=False):
"""Loads a locally saved model.
Parameters
Expand All @@ -189,14 +189,16 @@ def loadSavedModel(folder, spark_session):
Folder of the saved model
spark_session : pyspark.sql.SparkSession
The current SparkSession
use_openvino: bool
Use OpenVINO backend
Returns
-------
BertSentenceEmbeddings
The restored model
"""
from sparknlp.internal import _BertSentenceLoader
jModel = _BertSentenceLoader(folder, spark_session._jsparkSession)._java_obj
jModel = _BertSentenceLoader(folder, spark_session._jsparkSession, use_openvino)._java_obj
return BertSentenceEmbeddings(java_model=jModel)

@staticmethod
Expand Down
6 changes: 4 additions & 2 deletions python/sparknlp/annotator/embeddings/e5_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def __init__(self, classname="com.johnsnowlabs.nlp.embeddings.E5Embeddings", jav
)

@staticmethod
def loadSavedModel(folder, spark_session):
def loadSavedModel(folder, spark_session, use_openvino=False):
"""Loads a locally saved model.
Parameters
Expand All @@ -158,14 +158,16 @@ def loadSavedModel(folder, spark_session):
Folder of the saved model
spark_session : pyspark.sql.SparkSession
The current SparkSession
use_openvino : bool
Use OpenVINO backend
Returns
-------
E5Embeddings
The restored model
"""
from sparknlp.internal import _E5Loader
jModel = _E5Loader(folder, spark_session._jsparkSession)._java_obj
jModel = _E5Loader(folder, spark_session._jsparkSession, use_openvino)._java_obj
return E5Embeddings(java_model=jModel)

@staticmethod
Expand Down
6 changes: 4 additions & 2 deletions python/sparknlp/annotator/embeddings/roberta_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def __init__(self, classname="com.johnsnowlabs.nlp.embeddings.RoBertaEmbeddings"
)

@staticmethod
def loadSavedModel(folder, spark_session):
def loadSavedModel(folder, spark_session, use_openvino=False):
"""Loads a locally saved model.
Parameters
Expand All @@ -190,14 +190,16 @@ def loadSavedModel(folder, spark_session):
Folder of the saved model
spark_session : pyspark.sql.SparkSession
The current SparkSession
use_openvino: bool
Use OpenVINO backend
Returns
-------
RoBertaEmbeddings
The restored model
"""
from sparknlp.internal import _RoBertaLoader
jModel = _RoBertaLoader(folder, spark_session._jsparkSession)._java_obj
jModel = _RoBertaLoader(folder, spark_session._jsparkSession, use_openvino)._java_obj
return RoBertaEmbeddings(java_model=jModel)

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def __init__(self, classname="com.johnsnowlabs.nlp.embeddings.XlmRoBertaEmbeddin
)

@staticmethod
def loadSavedModel(folder, spark_session):
def loadSavedModel(folder, spark_session, use_openvino=False):
"""Loads a locally saved model.
Parameters
Expand All @@ -190,14 +190,16 @@ def loadSavedModel(folder, spark_session):
Folder of the saved model
spark_session : pyspark.sql.SparkSession
The current SparkSession
use_openvino: bool
Use OpenVINO backend
Returns
-------
XlmRoBertaEmbeddings
The restored model
"""
from sparknlp.internal import _XlmRoBertaLoader
jModel = _XlmRoBertaLoader(folder, spark_session._jsparkSession)._java_obj
jModel = _XlmRoBertaLoader(folder, spark_session._jsparkSession, use_openvino)._java_obj
return XlmRoBertaEmbeddings(java_model=jModel)

@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions python/sparknlp/annotator/seq2seq/llama2_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.seq2seq.LLAMA2Tran
)

@staticmethod
def loadSavedModel(folder, spark_session):
def loadSavedModel(folder, spark_session, use_openvino = False):
"""Loads a locally saved model.
Parameters
Expand All @@ -317,7 +317,7 @@ def loadSavedModel(folder, spark_session):
The restored model
"""
from sparknlp.internal import _LLAMA2Loader
jModel = _LLAMA2Loader(folder, spark_session._jsparkSession)._java_obj
jModel = _LLAMA2Loader(folder, spark_session._jsparkSession, use_openvino)._java_obj
return LLAMA2Transformer(java_model=jModel)

@staticmethod
Expand Down
24 changes: 12 additions & 12 deletions python/sparknlp/internal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ def __init__(self, path, jspark):


class _BertLoader(ExtendedJavaWrapper):
def __init__(self, path, jspark):
super(_BertLoader, self).__init__("com.johnsnowlabs.nlp.embeddings.BertEmbeddings.loadSavedModel", path, jspark)
def __init__(self, path, jspark, use_openvino=False):
super(_BertLoader, self).__init__("com.johnsnowlabs.nlp.embeddings.BertEmbeddings.loadSavedModel", path, jspark, use_openvino)


class _BertSentenceLoader(ExtendedJavaWrapper):
def __init__(self, path, jspark):
def __init__(self, path, jspark, use_openvino=False):
super(_BertSentenceLoader, self).__init__(
"com.johnsnowlabs.nlp.embeddings.BertSentenceEmbeddings.loadSavedModel", path, jspark)
"com.johnsnowlabs.nlp.embeddings.BertSentenceEmbeddings.loadSavedModel", path, jspark, use_openvino)


class _BertSequenceClassifierLoader(ExtendedJavaWrapper):
Expand Down Expand Up @@ -144,8 +144,8 @@ def __init__(self, path, jspark):


class _E5Loader(ExtendedJavaWrapper):
def __init__(self, path, jspark):
super(_E5Loader, self).__init__("com.johnsnowlabs.nlp.embeddings.E5Embeddings.loadSavedModel", path, jspark)
def __init__(self, path, jspark, use_openvino=False):
super(_E5Loader, self).__init__("com.johnsnowlabs.nlp.embeddings.E5Embeddings.loadSavedModel", path, jspark, use_openvino)


class _BGELoader(ExtendedJavaWrapper):
Expand All @@ -160,9 +160,9 @@ def __init__(self, path, jspark):


class _LLAMA2Loader(ExtendedJavaWrapper):
def __init__(self, path, jspark):
def __init__(self, path, jspark, use_openvino=False):
super(_LLAMA2Loader, self).__init__(
"com.johnsnowlabs.nlp.annotators.seq2seq.LLAMA2Transformer.loadSavedModel", path, jspark)
"com.johnsnowlabs.nlp.annotators.seq2seq.LLAMA2Transformer.loadSavedModel", path, jspark, use_openvino)


class _LongformerLoader(ExtendedJavaWrapper):
Expand Down Expand Up @@ -212,9 +212,9 @@ def __init__(self, path, jspark):


class _RoBertaLoader(ExtendedJavaWrapper):
def __init__(self, path, jspark):
def __init__(self, path, jspark, use_openvino=False):
super(_RoBertaLoader, self).__init__("com.johnsnowlabs.nlp.embeddings.RoBertaEmbeddings.loadSavedModel", path,
jspark)
jspark, use_openvino)


class _RoBertaSentenceLoader(ExtendedJavaWrapper):
Expand Down Expand Up @@ -261,9 +261,9 @@ def __init__(self, path, jspark, loadsp):


class _XlmRoBertaLoader(ExtendedJavaWrapper):
def __init__(self, path, jspark):
def __init__(self, path, jspark, use_openvino=False):
super(_XlmRoBertaLoader, self).__init__("com.johnsnowlabs.nlp.embeddings.XlmRoBertaEmbeddings.loadSavedModel",
path, jspark)
path, jspark, use_openvino)


class _XlmRoBertaSentenceLoader(ExtendedJavaWrapper):
Expand Down
4 changes: 3 additions & 1 deletion src/main/scala/com/johnsnowlabs/ml/ai/Bart.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import com.johnsnowlabs.ml.tensorflow.{TensorResources, TensorflowWrapper}
import com.johnsnowlabs.nlp.annotators.common.SentenceSplit
import com.johnsnowlabs.nlp.annotators.tokenizer.bpe.{BartTokenizer, BpeTokenizer}
import com.johnsnowlabs.nlp.{Annotation, AnnotatorType}
import org.intel.openvino.InferRequest
import org.tensorflow.{Session, Tensor}

import scala.collection.JavaConverters._
Expand Down Expand Up @@ -366,7 +367,8 @@ private[johnsnowlabs] class Bart(
decoderEncoderStateTensors: Either[Tensor, OnnxTensor],
encoderAttentionMaskTensors: Either[Tensor, OnnxTensor],
maxLength: Int,
session: Either[Session, (OrtEnvironment, OrtSession)]): Array[Array[Float]] = {
session: Either[Session, (OrtEnvironment, OrtSession)],
ovInferRequest: Option[InferRequest]): Array[Array[Float]] = {

// extract decoderEncoderStateTensors, encoderAttentionMaskTensors and Session from LEFT
assert(decoderEncoderStateTensors.isLeft)
Expand Down
43 changes: 42 additions & 1 deletion src/main/scala/com/johnsnowlabs/ml/ai/Bert.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ package com.johnsnowlabs.ml.ai
import ai.onnxruntime.OnnxTensor
import com.johnsnowlabs.ml.ai.util.PrepareEmbeddings
import com.johnsnowlabs.ml.onnx.{OnnxSession, OnnxWrapper}
import com.johnsnowlabs.ml.openvino.OpenvinoWrapper
import com.johnsnowlabs.ml.tensorflow.sign.{ModelSignatureConstants, ModelSignatureManager}
import com.johnsnowlabs.ml.tensorflow.{TensorResources, TensorflowWrapper}
import com.johnsnowlabs.ml.util.{ModelArch, ONNX, TensorFlow}
import com.johnsnowlabs.ml.util.{ModelArch, ONNX, Openvino, TensorFlow}
import com.johnsnowlabs.nlp.annotators.common._
import com.johnsnowlabs.nlp.{Annotation, AnnotatorType}
import org.slf4j.{Logger, LoggerFactory}
import org.intel.openvino.Tensor

import scala.collection.JavaConverters._

Expand All @@ -40,6 +42,8 @@ import scala.collection.JavaConverters._
* Bert Model wrapper with TensorFlow Wrapper
* @param onnxWrapper
* Bert Model wrapper with ONNX Wrapper
* @param openvinoWrapper
* Bert Model wrapper with OpenVINO Wrapper
* @param sentenceStartTokenId
* Id of sentence start Token
* @param sentenceEndTokenId
Expand All @@ -54,6 +58,7 @@ import scala.collection.JavaConverters._
private[johnsnowlabs] class Bert(
val tensorflowWrapper: Option[TensorflowWrapper],
val onnxWrapper: Option[OnnxWrapper],
val openvinoWrapper: Option[OpenvinoWrapper],
sentenceStartTokenId: Int,
sentenceEndTokenId: Int,
configProtoBytes: Option[Array[Byte]] = None,
Expand All @@ -67,6 +72,7 @@ private[johnsnowlabs] class Bert(
val detectedEngine: String =
if (tensorflowWrapper.isDefined) TensorFlow.name
else if (onnxWrapper.isDefined) ONNX.name
else if (openvinoWrapper.isDefined) Openvino.name
else TensorFlow.name
private val onnxSessionOptions: Map[String, String] = new OnnxSession().getSessionOptions

Expand Down Expand Up @@ -136,6 +142,23 @@ private[johnsnowlabs] class Bert(
maskTensors.close()
segmentTensors.close()
}
case Openvino.name =>
val shape = Array(batchLength, maxSentenceLength)
val (tokenTensors, maskTensors) =
PrepareEmbeddings.prepareOvLongBatchTensors(batch, maxSentenceLength, batchLength)
val segmentTensors = new Tensor(shape, Array.fill(batchLength * maxSentenceLength)(0L))

val inferRequest = openvinoWrapper.get.getCompiledModel().create_infer_request()
inferRequest.set_tensor("input_ids", tokenTensors)
inferRequest.set_tensor("attention_mask", maskTensors)
inferRequest.set_tensor("token_type_ids", segmentTensors)

inferRequest.infer()

val result = inferRequest.get_tensor("last_hidden_state")
val embeddings = result.data()

embeddings
case _ =>
val tensors = new TensorResources()

Expand Down Expand Up @@ -244,6 +267,22 @@ private[johnsnowlabs] class Bert(
// Rethrow the exception to propagate it further
throw e
}
case Openvino.name =>
val shape = Array(batchLength, maxSentenceLength)
val (tokenTensors, maskTensors) =
PrepareEmbeddings.prepareOvLongBatchTensors(batch, maxSentenceLength, batchLength)
val segmentTensors = new Tensor(shape, Array.fill(batchLength * maxSentenceLength)(0L))

val inferRequest = openvinoWrapper.get.getCompiledModel().create_infer_request()
inferRequest.set_tensor("input_ids", tokenTensors)
inferRequest.set_tensor("attention_mask", maskTensors)
inferRequest.set_tensor("token_type_ids", segmentTensors)

inferRequest.infer()

val result = inferRequest.get_tensor("last_hidden_state")
val embeddings = result.data()
embeddings
case _ =>
val tensors = new TensorResources()

Expand Down Expand Up @@ -297,6 +336,8 @@ private[johnsnowlabs] class Bert(

def tagSequenceSBert(batch: Seq[Array[Int]]): Array[Array[Float]] = {
detectedEngine match {
case Openvino.name =>
tagSequence(batch)
case ONNX.name =>
tagSequence(batch)
case TensorFlow.name =>
Expand Down
Loading

0 comments on commit fabc4ab

Please sign in to comment.