Skip to content

Commit

Permalink
Sparknlp 967 add onnx support to xlm roberta classifiers (#14130)
Browse files Browse the repository at this point in the history
* fixing typo + adding support for ONNX to XLM-Roberta

* adding conversion notebooks
  • Loading branch information
ahmedlone127 authored Jan 18, 2024
1 parent 8585b7e commit 818de8d
Show file tree
Hide file tree
Showing 14 changed files with 7,116 additions and 219 deletions.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ private[johnsnowlabs] class AlbertClassification(
val maxSentenceLength = batch.map(encodedSentence => encodedSentence.length).max

val rawScores = detectedEngine match {
case ONNX.name => getRowScoresWithOnnx(batch, maxSentenceLength, sequence = true)
case ONNX.name => getRawScoresWithOnnx(batch, maxSentenceLength, sequence = true)
case _ => getRawScoresWithTF(batch, maxSentenceLength)
}

Expand All @@ -128,7 +128,7 @@ private[johnsnowlabs] class AlbertClassification(
val maxSentenceLength = batch.map(encodedSentence => encodedSentence.length).max

val rawScores = detectedEngine match {
case ONNX.name => getRowScoresWithOnnx(batch, maxSentenceLength, sequence = true)
case ONNX.name => getRawScoresWithOnnx(batch, maxSentenceLength, sequence = true)
case _ => getRawScoresWithTF(batch, maxSentenceLength)
}

Expand Down Expand Up @@ -203,7 +203,7 @@ private[johnsnowlabs] class AlbertClassification(
rawScores
}

private def getRowScoresWithOnnx(
private def getRawScoresWithOnnx(
batch: Seq[Array[Int]],
maxSentenceLength: Int,
sequence: Boolean): Array[Float] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ private[johnsnowlabs] class BertClassification(

val rawScores = detectedEngine match {
case ONNX.name =>
getRowScoresWithOnnx(batch, maxSentenceLength)
getRawScoresWithOnnx(batch, maxSentenceLength)
case _ => getRawScoresWithTF(batch, maxSentenceLength)
}

Expand Down Expand Up @@ -218,7 +218,7 @@ private[johnsnowlabs] class BertClassification(
rawScores
}

private def getRowScoresWithOnnx(
private def getRawScoresWithOnnx(
batch: Seq[Array[Int]],
maxSentenceLength: Int): Array[Float] = {

Expand Down Expand Up @@ -265,7 +265,7 @@ private[johnsnowlabs] class BertClassification(
val maxSentenceLength = batch.map(encodedSentence => encodedSentence.length).max
val rawScores = detectedEngine match {
case ONNX.name =>
getRowScoresWithOnnx(batch, maxSentenceLength)
getRawScoresWithOnnx(batch, maxSentenceLength)
case _ => getRawScoresWithTF(batch, maxSentenceLength)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ private[johnsnowlabs] class CamemBertClassification(
val maxSentenceLength = batch.map(encodedSentence => encodedSentence.length).max

val rawScores = detectedEngine match {
case ONNX.name => getRowScoresWithOnnx(batch)
case ONNX.name => getRawScoresWithOnnx(batch)
case _ => getRawScoresWithTF(batch, maxSentenceLength)
}

Expand Down Expand Up @@ -189,7 +189,7 @@ private[johnsnowlabs] class CamemBertClassification(
rawScores
}

private def getRowScoresWithOnnx(batch: Seq[Array[Int]]): Array[Float] = {
private def getRawScoresWithOnnx(batch: Seq[Array[Int]]): Array[Float] = {

// [nb of encoded sentences , maxSentenceLength]
val (runner, env) = onnxWrapper.get.getSession(onnxSessionOptions)
Expand Down Expand Up @@ -227,7 +227,7 @@ private[johnsnowlabs] class CamemBertClassification(
val batchLength = batch.length

val rawScores = detectedEngine match {
case ONNX.name => getRowScoresWithOnnx(batch)
case ONNX.name => getRawScoresWithOnnx(batch)
case _ => getRawScoresWithTF(batch, maxSentenceLength)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ private[johnsnowlabs] class DeBertaClassification(
val batchLength = batch.length

val rawScores = detectedEngine match {
case ONNX.name => getRowScoresWithOnnx(batch)
case ONNX.name => getRawScoresWithOnnx(batch)
case _ => getRawScoresWithTF(batch)
}

Expand Down Expand Up @@ -182,7 +182,7 @@ private[johnsnowlabs] class DeBertaClassification(
rawScores
}

private def getRowScoresWithOnnx(batch: Seq[Array[Int]]): Array[Float] = {
private def getRawScoresWithOnnx(batch: Seq[Array[Int]]): Array[Float] = {

// [nb of encoded sentences , maxSentenceLength]
val (runner, env) = onnxWrapper.get.getSession(onnxSessionOptions)
Expand Down Expand Up @@ -219,7 +219,7 @@ private[johnsnowlabs] class DeBertaClassification(
val batchLength = batch.length

val rawScores = detectedEngine match {
case ONNX.name => getRowScoresWithOnnx(batch)
case ONNX.name => getRawScoresWithOnnx(batch)
case _ => getRawScoresWithTF(batch)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ private[johnsnowlabs] class DistilBertClassification(
val maxSentenceLength = batch.map(encodedSentence => encodedSentence.length).max

val rawScores = detectedEngine match {
case ONNX.name => getRowScoresWithOnnx(batch)
case ONNX.name => getRawScoresWithOnnx(batch)
case _ => getRawScoresWithTF(batch, maxSentenceLength)
}

Expand Down Expand Up @@ -211,7 +211,7 @@ private[johnsnowlabs] class DistilBertClassification(
rawScores
}

private def getRowScoresWithOnnx(batch: Seq[Array[Int]]): Array[Float] = {
private def getRawScoresWithOnnx(batch: Seq[Array[Int]]): Array[Float] = {

val (runner, env) = onnxWrapper.get.getSession(onnxSessionOptions)

Expand Down Expand Up @@ -247,7 +247,7 @@ private[johnsnowlabs] class DistilBertClassification(
val maxSentenceLength = batch.map(encodedSentence => encodedSentence.length).max

val rawScores = detectedEngine match {
case ONNX.name => getRowScoresWithOnnx(batch)
case ONNX.name => getRawScoresWithOnnx(batch)
case _ => getRawScoresWithTF(batch, maxSentenceLength)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ private[johnsnowlabs] class RoBertaClassification(
val maxSentenceLength = batch.map(encodedSentence => encodedSentence.length).max

val rawScores = detectedEngine match {
case ONNX.name => getRowScoresWithOnnx(batch)
case ONNX.name => getRawScoresWithOnnx(batch)
case _ => getRawScoresWithTF(batch, maxSentenceLength)
}

Expand Down Expand Up @@ -207,7 +207,7 @@ private[johnsnowlabs] class RoBertaClassification(
rawScores
}

private def getRowScoresWithOnnx(batch: Seq[Array[Int]]): Array[Float] = {
private def getRawScoresWithOnnx(batch: Seq[Array[Int]]): Array[Float] = {

// [nb of encoded sentences , maxSentenceLength]
val (runner, env) = onnxWrapper.get.getSession(onnxSessionOptions)
Expand Down Expand Up @@ -244,7 +244,7 @@ private[johnsnowlabs] class RoBertaClassification(
val maxSentenceLength = batch.map(encodedSentence => encodedSentence.length).max

val rawScores = detectedEngine match {
case ONNX.name => getRowScoresWithOnnx(batch)
case ONNX.name => getRawScoresWithOnnx(batch)
case _ => getRawScoresWithTF(batch, maxSentenceLength)
}

Expand Down
Loading

0 comments on commit 818de8d

Please sign in to comment.