Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrating OpenVINO Runtime in Spark NLP #14200

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
7c798f6
Use OpenVINO model engine for BertEmbeddings
rajatkrishna Aug 26, 2023
27d929c
Use Long Tensors with XlmRoberta
rajatkrishna Aug 18, 2023
d6e1439
Add OpenVINO support for RoBerta and XlmRoBerta embeddings
rajatkrishna Aug 18, 2023
6e7ffe1
Fix data type and formatting
rajatkrishna Aug 18, 2023
b025a70
Add OpenVINO BERT test
rajatkrishna Mar 4, 2024
5804be4
Update Python APIs to use OpenVINO
rajatkrishna Nov 7, 2023
b9d92ef
Add OpenVINO support for E5 Embeddings
rajatkrishna Dec 20, 2023
6e9bca4
Resolve merge issues
rajatkrishna Feb 27, 2024
48136fa
Add OpenVINO support for T5
rajatkrishna Feb 29, 2024
8f44ac6
Read and write encoder-decoder models with OpenVINO
rajatkrishna Mar 9, 2024
aa13333
OpenVINO Async Inference
rajatkrishna Mar 9, 2024
981e0e5
Refactor and cleanup
rajatkrishna Mar 9, 2024
cac3368
Update comments
rajatkrishna Mar 9, 2024
a7cd5c8
Add config to set OpenVINO inference device
rajatkrishna Mar 12, 2024
6f48d1f
Add OpenVINO support for BERT Sentence Embeddings
rajatkrishna Mar 21, 2024
4b1c0ba
Formatting
rajatkrishna Mar 21, 2024
8b922e7
Openvino synchronous inference
rajatkrishna Mar 21, 2024
c969fe3
Refactoring: OV Model Conversion
rajatkrishna Mar 23, 2024
1097536
BertSentenceEmbeddings Python API
rajatkrishna Mar 29, 2024
904b492
Enable OpenVINO support for Llama2
rajatkrishna Mar 31, 2024
9eb5b2f
Read/write Llama2 Transformer with OpenVINO
rajatkrishna Apr 5, 2024
d918274
Merge branch 'master' into openvino-integration
rajatkrishna Apr 5, 2024
1a54f36
Bugfix: Update saved model filename
rajatkrishna Apr 5, 2024
9c6c7a9
Replace broadcast with addFile for OpenVINO-based annotators
rajatkrishna Apr 20, 2024
5f73d0b
Add OpenVINO Wrapper tests
rajatkrishna May 2, 2024
fb1fb9a
Add suffix to avoid duplication in Spark Files
rajatkrishna May 3, 2024
7fa98e9
Set default OV inference device to CPU
rajatkrishna May 3, 2024
575846b
Bugfix: Read serialized model from folder
rajatkrishna May 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions python/sparknlp/annotator/embeddings/bert_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def __init__(self, classname="com.johnsnowlabs.nlp.embeddings.BertEmbeddings", j
)

@staticmethod
def loadSavedModel(folder, spark_session):
def loadSavedModel(folder, spark_session, use_openvino=False):
"""Loads a locally saved model.
Parameters
Expand All @@ -173,14 +173,16 @@ def loadSavedModel(folder, spark_session):
Folder of the saved model
spark_session : pyspark.sql.SparkSession
The current SparkSession
use_openvino: bool
Use OpenVINO backend
Returns
-------
BertEmbeddings
The restored model
"""
from sparknlp.internal import _BertLoader
jModel = _BertLoader(folder, spark_session._jsparkSession)._java_obj
jModel = _BertLoader(folder, spark_session._jsparkSession, use_openvino)._java_obj
return BertEmbeddings(java_model=jModel)

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def __init__(self, classname="com.johnsnowlabs.nlp.embeddings.BertSentenceEmbedd
)

@staticmethod
def loadSavedModel(folder, spark_session):
def loadSavedModel(folder, spark_session, use_openvino=False):
"""Loads a locally saved model.

Parameters
Expand All @@ -189,14 +189,16 @@ def loadSavedModel(folder, spark_session):
Folder of the saved model
spark_session : pyspark.sql.SparkSession
The current SparkSession
use_openvino: bool
Use OpenVINO backend

Returns
-------
BertSentenceEmbeddings
The restored model
"""
from sparknlp.internal import _BertSentenceLoader
jModel = _BertSentenceLoader(folder, spark_session._jsparkSession)._java_obj
jModel = _BertSentenceLoader(folder, spark_session._jsparkSession, use_openvino)._java_obj
return BertSentenceEmbeddings(java_model=jModel)

@staticmethod
Expand Down
6 changes: 4 additions & 2 deletions python/sparknlp/annotator/embeddings/e5_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def __init__(self, classname="com.johnsnowlabs.nlp.embeddings.E5Embeddings", jav
)

@staticmethod
def loadSavedModel(folder, spark_session):
def loadSavedModel(folder, spark_session, use_openvino=False):
"""Loads a locally saved model.
Parameters
Expand All @@ -158,14 +158,16 @@ def loadSavedModel(folder, spark_session):
Folder of the saved model
spark_session : pyspark.sql.SparkSession
The current SparkSession
use_openvino : bool
Use OpenVINO backend
Returns
-------
E5Embeddings
The restored model
"""
from sparknlp.internal import _E5Loader
jModel = _E5Loader(folder, spark_session._jsparkSession)._java_obj
jModel = _E5Loader(folder, spark_session._jsparkSession, use_openvino)._java_obj
return E5Embeddings(java_model=jModel)

@staticmethod
Expand Down
6 changes: 4 additions & 2 deletions python/sparknlp/annotator/embeddings/roberta_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def __init__(self, classname="com.johnsnowlabs.nlp.embeddings.RoBertaEmbeddings"
)

@staticmethod
def loadSavedModel(folder, spark_session):
def loadSavedModel(folder, spark_session, use_openvino=False):
"""Loads a locally saved model.
Parameters
Expand All @@ -190,14 +190,16 @@ def loadSavedModel(folder, spark_session):
Folder of the saved model
spark_session : pyspark.sql.SparkSession
The current SparkSession
use_openvino: bool
Use OpenVINO backend
Returns
-------
RoBertaEmbeddings
The restored model
"""
from sparknlp.internal import _RoBertaLoader
jModel = _RoBertaLoader(folder, spark_session._jsparkSession)._java_obj
jModel = _RoBertaLoader(folder, spark_session._jsparkSession, use_openvino)._java_obj
return RoBertaEmbeddings(java_model=jModel)

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def __init__(self, classname="com.johnsnowlabs.nlp.embeddings.XlmRoBertaEmbeddin
)

@staticmethod
def loadSavedModel(folder, spark_session):
def loadSavedModel(folder, spark_session, use_openvino=False):
"""Loads a locally saved model.

Parameters
Expand All @@ -190,14 +190,16 @@ def loadSavedModel(folder, spark_session):
Folder of the saved model
spark_session : pyspark.sql.SparkSession
The current SparkSession
use_openvino: bool
Use OpenVINO backend

Returns
-------
XlmRoBertaEmbeddings
The restored model
"""
from sparknlp.internal import _XlmRoBertaLoader
jModel = _XlmRoBertaLoader(folder, spark_session._jsparkSession)._java_obj
jModel = _XlmRoBertaLoader(folder, spark_session._jsparkSession, use_openvino)._java_obj
return XlmRoBertaEmbeddings(java_model=jModel)

@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions python/sparknlp/annotator/seq2seq/llama2_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.seq2seq.LLAMA2Tran
)

@staticmethod
def loadSavedModel(folder, spark_session):
def loadSavedModel(folder, spark_session, use_openvino = False):
"""Loads a locally saved model.
Parameters
Expand All @@ -317,7 +317,7 @@ def loadSavedModel(folder, spark_session):
The restored model
"""
from sparknlp.internal import _LLAMA2Loader
jModel = _LLAMA2Loader(folder, spark_session._jsparkSession)._java_obj
jModel = _LLAMA2Loader(folder, spark_session._jsparkSession, use_openvino)._java_obj
return LLAMA2Transformer(java_model=jModel)

@staticmethod
Expand Down
24 changes: 12 additions & 12 deletions python/sparknlp/internal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ def __init__(self, path, jspark):


class _BertLoader(ExtendedJavaWrapper):
def __init__(self, path, jspark):
super(_BertLoader, self).__init__("com.johnsnowlabs.nlp.embeddings.BertEmbeddings.loadSavedModel", path, jspark)
def __init__(self, path, jspark, use_openvino=False):
super(_BertLoader, self).__init__("com.johnsnowlabs.nlp.embeddings.BertEmbeddings.loadSavedModel", path, jspark, use_openvino)


class _BertSentenceLoader(ExtendedJavaWrapper):
def __init__(self, path, jspark):
def __init__(self, path, jspark, use_openvino=False):
super(_BertSentenceLoader, self).__init__(
"com.johnsnowlabs.nlp.embeddings.BertSentenceEmbeddings.loadSavedModel", path, jspark)
"com.johnsnowlabs.nlp.embeddings.BertSentenceEmbeddings.loadSavedModel", path, jspark, use_openvino)


class _BertSequenceClassifierLoader(ExtendedJavaWrapper):
Expand Down Expand Up @@ -144,8 +144,8 @@ def __init__(self, path, jspark):


class _E5Loader(ExtendedJavaWrapper):
def __init__(self, path, jspark):
super(_E5Loader, self).__init__("com.johnsnowlabs.nlp.embeddings.E5Embeddings.loadSavedModel", path, jspark)
def __init__(self, path, jspark, use_openvino=False):
super(_E5Loader, self).__init__("com.johnsnowlabs.nlp.embeddings.E5Embeddings.loadSavedModel", path, jspark, use_openvino)


class _BGELoader(ExtendedJavaWrapper):
Expand All @@ -160,9 +160,9 @@ def __init__(self, path, jspark):


class _LLAMA2Loader(ExtendedJavaWrapper):
def __init__(self, path, jspark):
def __init__(self, path, jspark, use_openvino=False):
super(_LLAMA2Loader, self).__init__(
"com.johnsnowlabs.nlp.annotators.seq2seq.LLAMA2Transformer.loadSavedModel", path, jspark)
"com.johnsnowlabs.nlp.annotators.seq2seq.LLAMA2Transformer.loadSavedModel", path, jspark, use_openvino)


class _LongformerLoader(ExtendedJavaWrapper):
Expand Down Expand Up @@ -212,9 +212,9 @@ def __init__(self, path, jspark):


class _RoBertaLoader(ExtendedJavaWrapper):
def __init__(self, path, jspark):
def __init__(self, path, jspark, use_openvino=False):
super(_RoBertaLoader, self).__init__("com.johnsnowlabs.nlp.embeddings.RoBertaEmbeddings.loadSavedModel", path,
jspark)
jspark, use_openvino)


class _RoBertaSentenceLoader(ExtendedJavaWrapper):
Expand Down Expand Up @@ -261,9 +261,9 @@ def __init__(self, path, jspark, loadsp):


class _XlmRoBertaLoader(ExtendedJavaWrapper):
def __init__(self, path, jspark):
def __init__(self, path, jspark, use_openvino=False):
super(_XlmRoBertaLoader, self).__init__("com.johnsnowlabs.nlp.embeddings.XlmRoBertaEmbeddings.loadSavedModel",
path, jspark)
path, jspark, use_openvino)


class _XlmRoBertaSentenceLoader(ExtendedJavaWrapper):
Expand Down
4 changes: 3 additions & 1 deletion src/main/scala/com/johnsnowlabs/ml/ai/Bart.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import com.johnsnowlabs.ml.tensorflow.{TensorResources, TensorflowWrapper}
import com.johnsnowlabs.nlp.annotators.common.SentenceSplit
import com.johnsnowlabs.nlp.annotators.tokenizer.bpe.{BartTokenizer, BpeTokenizer}
import com.johnsnowlabs.nlp.{Annotation, AnnotatorType}
import org.intel.openvino.InferRequest
import org.tensorflow.{Session, Tensor}

import scala.collection.JavaConverters._
Expand Down Expand Up @@ -366,7 +367,8 @@ private[johnsnowlabs] class Bart(
decoderEncoderStateTensors: Either[Tensor, OnnxTensor],
encoderAttentionMaskTensors: Either[Tensor, OnnxTensor],
maxLength: Int,
session: Either[Session, (OrtEnvironment, OrtSession)]): Array[Array[Float]] = {
session: Either[Session, (OrtEnvironment, OrtSession)],
ovInferRequest: Option[InferRequest]): Array[Array[Float]] = {

// extract decoderEncoderStateTensors, encoderAttentionMaskTensors and Session from LEFT
assert(decoderEncoderStateTensors.isLeft)
Expand Down
43 changes: 42 additions & 1 deletion src/main/scala/com/johnsnowlabs/ml/ai/Bert.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ package com.johnsnowlabs.ml.ai
import ai.onnxruntime.OnnxTensor
import com.johnsnowlabs.ml.ai.util.PrepareEmbeddings
import com.johnsnowlabs.ml.onnx.{OnnxSession, OnnxWrapper}
import com.johnsnowlabs.ml.openvino.OpenvinoWrapper
import com.johnsnowlabs.ml.tensorflow.sign.{ModelSignatureConstants, ModelSignatureManager}
import com.johnsnowlabs.ml.tensorflow.{TensorResources, TensorflowWrapper}
import com.johnsnowlabs.ml.util.{ModelArch, ONNX, TensorFlow}
import com.johnsnowlabs.ml.util.{ModelArch, ONNX, Openvino, TensorFlow}
import com.johnsnowlabs.nlp.annotators.common._
import com.johnsnowlabs.nlp.{Annotation, AnnotatorType}
import org.slf4j.{Logger, LoggerFactory}
import org.intel.openvino.Tensor

import scala.collection.JavaConverters._

Expand All @@ -40,6 +42,8 @@ import scala.collection.JavaConverters._
* Bert Model wrapper with TensorFlow Wrapper
* @param onnxWrapper
* Bert Model wrapper with ONNX Wrapper
* @param openvinoWrapper
* Bert Model wrapper with OpenVINO Wrapper
* @param sentenceStartTokenId
* Id of sentence start Token
* @param sentenceEndTokenId
Expand All @@ -54,6 +58,7 @@ import scala.collection.JavaConverters._
private[johnsnowlabs] class Bert(
val tensorflowWrapper: Option[TensorflowWrapper],
val onnxWrapper: Option[OnnxWrapper],
val openvinoWrapper: Option[OpenvinoWrapper],
sentenceStartTokenId: Int,
sentenceEndTokenId: Int,
configProtoBytes: Option[Array[Byte]] = None,
Expand All @@ -67,6 +72,7 @@ private[johnsnowlabs] class Bert(
val detectedEngine: String =
if (tensorflowWrapper.isDefined) TensorFlow.name
else if (onnxWrapper.isDefined) ONNX.name
else if (openvinoWrapper.isDefined) Openvino.name
else TensorFlow.name
private val onnxSessionOptions: Map[String, String] = new OnnxSession().getSessionOptions

Expand Down Expand Up @@ -136,6 +142,23 @@ private[johnsnowlabs] class Bert(
maskTensors.close()
segmentTensors.close()
}
case Openvino.name =>
val shape = Array(batchLength, maxSentenceLength)
val (tokenTensors, maskTensors) =
PrepareEmbeddings.prepareOvLongBatchTensors(batch, maxSentenceLength, batchLength)
val segmentTensors = new Tensor(shape, Array.fill(batchLength * maxSentenceLength)(0L))

val inferRequest = openvinoWrapper.get.getCompiledModel().create_infer_request()
inferRequest.set_tensor("input_ids", tokenTensors)
inferRequest.set_tensor("attention_mask", maskTensors)
inferRequest.set_tensor("token_type_ids", segmentTensors)

inferRequest.infer()

val result = inferRequest.get_tensor("last_hidden_state")
val embeddings = result.data()

embeddings
case _ =>
val tensors = new TensorResources()

Expand Down Expand Up @@ -244,6 +267,22 @@ private[johnsnowlabs] class Bert(
// Rethrow the exception to propagate it further
throw e
}
case Openvino.name =>
val shape = Array(batchLength, maxSentenceLength)
val (tokenTensors, maskTensors) =
PrepareEmbeddings.prepareOvLongBatchTensors(batch, maxSentenceLength, batchLength)
val segmentTensors = new Tensor(shape, Array.fill(batchLength * maxSentenceLength)(0L))

val inferRequest = openvinoWrapper.get.getCompiledModel().create_infer_request()
inferRequest.set_tensor("input_ids", tokenTensors)
inferRequest.set_tensor("attention_mask", maskTensors)
inferRequest.set_tensor("token_type_ids", segmentTensors)

inferRequest.infer()

val result = inferRequest.get_tensor("last_hidden_state")
val embeddings = result.data()
embeddings
case _ =>
val tensors = new TensorResources()

Expand Down Expand Up @@ -297,6 +336,8 @@ private[johnsnowlabs] class Bert(

def tagSequenceSBert(batch: Seq[Array[Int]]): Array[Array[Float]] = {
detectedEngine match {
case Openvino.name =>
tagSequence(batch)
case ONNX.name =>
tagSequence(batch)
case TensorFlow.name =>
Expand Down
Loading
Loading