From 7adf658f1e407d847a6e11bf4402c05470c1e835 Mon Sep 17 00:00:00 2001 From: Danilo Burbano Date: Wed, 2 Oct 2024 17:30:14 -0500 Subject: [PATCH 1/6] [SPARKNLP-1068] Introducing BLIPForQuestionAnswering transformer --- python/sparknlp/annotator/cv/__init__.py | 1 + .../cv/blip_for_question_answering.py | 107 +++++++ python/sparknlp/base/image_assembler.py | 11 + python/sparknlp/base/light_pipeline.py | 29 +- python/sparknlp/internal/__init__.py | 8 + .../cv/blip_for_question_answering_test.py | 80 +++++ .../johnsnowlabs/ml/ai/BLIPClassifier.scala | 215 +++++++++++++ .../johnsnowlabs/nlp/AnnotationImage.scala | 24 +- .../nlp/HasBatchedAnnotateImage.scala | 3 +- .../com/johnsnowlabs/nlp/ImageAssembler.scala | 40 ++- .../com/johnsnowlabs/nlp/LightPipeline.scala | 83 +++-- .../cv/BLIPForQuestionAnswering.scala | 301 ++++++++++++++++++ .../tokenizer/bpe/BertTokenizer.scala | 81 +++++ .../tokenizer/bpe/BpeSpecialTokens.scala | 8 + .../nlp/pretrained/PretrainedPipeline.scala | 11 +- .../johnsnowlabs/nlp/AssertAnnotations.scala | 9 +- .../johnsnowlabs/nlp/ImageAssemblerTest.scala | 29 +- .../cv/BLIPForQuestionAnsweringTest.scala | 186 +++++++++++ ...LIPForZeroShotClassificationTestSpec.scala | 2 +- .../cv/ViTImageClassificationTestSpec.scala | 6 +- ...derDecoderForImageCaptioningTestSpec.scala | 2 +- 21 files changed, 1175 insertions(+), 61 deletions(-) create mode 100644 python/sparknlp/annotator/cv/blip_for_question_answering.py create mode 100644 python/test/annotator/cv/blip_for_question_answering_test.py create mode 100644 src/main/scala/com/johnsnowlabs/ml/ai/BLIPClassifier.scala create mode 100644 src/main/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnswering.scala create mode 100644 src/main/scala/com/johnsnowlabs/nlp/annotators/tokenizer/bpe/BertTokenizer.scala create mode 100644 src/test/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnsweringTest.scala diff --git a/python/sparknlp/annotator/cv/__init__.py b/python/sparknlp/annotator/cv/__init__.py index 7c89437989600b..37eeaf696bb2a8 100644 --- a/python/sparknlp/annotator/cv/__init__.py +++ b/python/sparknlp/annotator/cv/__init__.py @@ -16,3 +16,4 @@ from sparknlp.annotator.cv.convnext_for_image_classification import * from sparknlp.annotator.cv.vision_encoder_decoder_for_image_captioning import * from sparknlp.annotator.cv.clip_for_zero_shot_classification import * +from sparknlp.annotator.cv.blip_for_question_answering import * \ No newline at end of file diff --git a/python/sparknlp/annotator/cv/blip_for_question_answering.py b/python/sparknlp/annotator/cv/blip_for_question_answering.py new file mode 100644 index 00000000000000..b861449e27d862 --- /dev/null +++ b/python/sparknlp/annotator/cv/blip_for_question_answering.py @@ -0,0 +1,107 @@ +# Copyright 2017-2024 John Snow Labs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from sparknlp.common import * + +class BLIPForQuestionAnswering(AnnotatorModel, + HasBatchedAnnotateImage, + HasImageFeatureProperties, + HasEngine, + HasCandidateLabelsProperties, + HasRescaleFactor): + + name = "BLIPForQuestionAnswering" + + inputAnnotatorTypes = [AnnotatorType.IMAGE] + + outputAnnotatorType = AnnotatorType.DOCUMENT + + configProtoBytes = Param(Params._dummy(), + "configProtoBytes", + "ConfigProto from tensorflow, serialized into byte array. Get with " + "config_proto.SerializeToString()", + TypeConverters.toListInt) + + maxSentenceLength = Param(Params._dummy(), + "maxSentenceLength", + "Maximum sentence length that the annotator will process. Above this, the sentence is skipped", + typeConverter=TypeConverters.toInt) + + def setMaxSentenceSize(self, value): + """Sets Maximum sentence length that the annotator will process, by + default 50. + + Parameters + ---------- + value : int + Maximum sentence length that the annotator will process + """ + return self._set(maxSentenceLength=value) + + + @keyword_only + def __init__(self, classname="com.johnsnowlabs.nlp.annotators.cv.BLIPForQuestionAnswering", + java_model=None): + super(BLIPForQuestionAnswering, self).__init__( + classname=classname, + java_model=java_model + ) + self._setDefault( + batchSize=2, + size=224, + maxSentenceLength=50 + ) + + @staticmethod + def loadSavedModel(folder, spark_session): + """Loads a locally saved model. + + Parameters + ---------- + folder : str + Folder of the saved model + spark_session : pyspark.sql.SparkSession + The current SparkSession + + Returns + ------- + CLIPForZeroShotClassification + The restored model + """ + from sparknlp.internal import _BLIPForQuestionAnswering + jModel = _BLIPForQuestionAnswering(folder, spark_session._jsparkSession)._java_obj + return BLIPForQuestionAnswering(java_model=jModel) + + @staticmethod + def pretrained(name="blip_vqa_tf", lang="en", remote_loc=None): + """Downloads and loads a pretrained model. + + Parameters + ---------- + name : str, optional + Name of the pretrained model, by default + "blip_vqa_tf" + lang : str, optional + Language of the pretrained model, by default "en" + remote_loc : str, optional + Optional remote address of the resource, by default None. Will use + Spark NLPs repositories otherwise. + + Returns + ------- + CLIPForZeroShotClassification + The restored model + """ + from sparknlp.pretrained import ResourceDownloader + return ResourceDownloader.downloadModel(BLIPForQuestionAnswering, name, lang, remote_loc) \ No newline at end of file diff --git a/python/sparknlp/base/image_assembler.py b/python/sparknlp/base/image_assembler.py index 3214ff37324172..cc8a9eb8c91253 100644 --- a/python/sparknlp/base/image_assembler.py +++ b/python/sparknlp/base/image_assembler.py @@ -65,6 +65,7 @@ class ImageAssembler(AnnotatorTransformer): outputAnnotatorType = AnnotatorType.IMAGE inputCol = Param(Params._dummy(), "inputCol", "input column name", typeConverter=TypeConverters.toString) + textCol = Param(Params._dummy(), "textCol", "text column name", typeConverter=TypeConverters.toString) outputCol = Param(Params._dummy(), "outputCol", "output column name", typeConverter=TypeConverters.toString) name = 'ImageAssembler' @@ -101,3 +102,13 @@ def setOutputCol(self, value): def getOutputCol(self): """Gets output column name of annotations.""" return self.getOrDefault(self.outputCol) + + def setTextCol(self, value): + """Sets an optional text column name. + + Parameters + ---------- + value : str + Name of an optional input text column + """ + return self._set(inputCol=value) diff --git a/python/sparknlp/base/light_pipeline.py b/python/sparknlp/base/light_pipeline.py index 0622652fc01a42..4dd4f9128622ad 100644 --- a/python/sparknlp/base/light_pipeline.py +++ b/python/sparknlp/base/light_pipeline.py @@ -277,7 +277,7 @@ def __fullAnnotateQuestionAnswering(self, question, context): return result - def fullAnnotateImage(self, path_to_image): + def fullAnnotateImage(self, path_to_image, text=None): """Annotates the data provided into `Annotation` type results. The data should be either a list or a str. @@ -287,27 +287,38 @@ def fullAnnotateImage(self, path_to_image): path_to_image : list or str Source path of image, list of paths to images + text: list or str, optional + Optional list or str of texts. If None, defaults to empty list if path_to_image is a list, or empty string if path_to_image is a string. + Returns ------- List[AnnotationImage] The result of the annotation """ + if not isinstance(path_to_image, (str, list)): + raise TypeError("argument for path_to_image must be 'str' or 'list[str]'") + + if text is None: + text = "" if isinstance(path_to_image, str) else [] + + if type(path_to_image) != type(text): + raise ValueError("`path_to_image` and `text` must be of the same type") + stages = self.pipeline_model.stages if not self._skipPipelineValidation(stages): self._validateStagesInputCols(stages) - if type(path_to_image) is str: + if isinstance(path_to_image, str): path_to_image = [path_to_image] + text = [text] - if type(path_to_image) is list: - result = [] + result = [] - for image_result in self._lightPipeline.fullAnnotateImageJava(path_to_image): - result.append(self.__buildStages(image_result)) + for image_result in self._lightPipeline.fullAnnotateImageJava(path_to_image, text): + result.append(self.__buildStages(image_result)) + + return result - return result - else: - raise TypeError("argument for annotation may be 'str' or list[str]") def __buildStages(self, annotations_result): stages = {} diff --git a/python/sparknlp/internal/__init__.py b/python/sparknlp/internal/__init__.py index c8732ef3ecb4e5..0386e5201968e4 100644 --- a/python/sparknlp/internal/__init__.py +++ b/python/sparknlp/internal/__init__.py @@ -999,3 +999,11 @@ def __init__(self, path, jspark): super(_SnowFlakeEmbeddingsLoader, self).__init__( "com.johnsnowlabs.nlp.embeddings.SnowFlakeEmbeddings.loadSavedModel", path, jspark ) + +class _BLIPForQuestionAnswering(ExtendedJavaWrapper): + def __init__(self, path, jspark): + super(_BLIPForQuestionAnswering, self).__init__( + "com.johnsnowlabs.nlp.annotators.cv.BLIPForQuestionAnswering.loadSavedModel", + path, + jspark, + ) \ No newline at end of file diff --git a/python/test/annotator/cv/blip_for_question_answering_test.py b/python/test/annotator/cv/blip_for_question_answering_test.py new file mode 100644 index 00000000000000..8eb0dbae3e70ae --- /dev/null +++ b/python/test/annotator/cv/blip_for_question_answering_test.py @@ -0,0 +1,80 @@ +# Copyright 2017-2024 John Snow Labs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +import pytest +import os + +from sparknlp.annotator import * +from sparknlp.base import * +from pyspark.sql.functions import lit +from test.util import SparkSessionForTest + + +class BLIPForQuestionAnsweringTestSetup(unittest.TestCase): + + def setUp(self): + self.images_path = os.getcwd() + "/../src/test/resources/image/" + image_df = SparkSessionForTest.spark.read.format("image").load( + path=self.images_path + ) + + self.test_df = image_df.withColumn("text", lit("What's this picture about?")) + + image_assembler = ImageAssembler().setInputCol("image").setOutputCol("image_assembler") + + imageClassifier = BLIPForQuestionAnswering.pretrained() \ + .setInputCols("image_assembler") \ + .setOutputCol("answer") \ + .setSize(384) + + self.pipeline = Pipeline( + stages=[ + image_assembler, + imageClassifier, + ] + ) + + self.model = self.pipeline.fit(self.test_df) + +@pytest.mark.slow +class BLIPForQuestionAnsweringTest(BLIPForQuestionAnsweringTestSetup, unittest.TestCase): + + def setUp(self): + super().setUp() + + def runTest(self): + result = self.model.transform(self.test_df).collect() + + for row in result: + self.assertTrue(row["answer"] != "") + + +@pytest.mark.slow +class LightBLIPForQuestionAnsweringTest(BLIPForQuestionAnsweringTestSetup, unittest.TestCase): + + def setUp(self): + super().setUp() + + def runTest(self): + light_pipeline = LightPipeline(self.model) + image_path = self.images_path + "bluetick.jpg" + print("image_path: " + image_path) + annotations_result = light_pipeline.fullAnnotateImage( + image_path, + "What's this picture about?" + ) + + for result in annotations_result: + self.assertTrue(len(result["image_assembler"]) > 0) + self.assertTrue(len(result["answer"]) > 0) \ No newline at end of file diff --git a/src/main/scala/com/johnsnowlabs/ml/ai/BLIPClassifier.scala b/src/main/scala/com/johnsnowlabs/ml/ai/BLIPClassifier.scala new file mode 100644 index 00000000000000..3182d6dd0fdf92 --- /dev/null +++ b/src/main/scala/com/johnsnowlabs/ml/ai/BLIPClassifier.scala @@ -0,0 +1,215 @@ +/* + * Copyright 2017-2024 John Snow Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.johnsnowlabs.ml.ai + +import com.johnsnowlabs.ml.tensorflow.sign.{ModelSignatureConstants, ModelSignatureManager} +import com.johnsnowlabs.ml.tensorflow.{TensorResources, TensorflowWrapper} +import com.johnsnowlabs.nlp.annotators.common._ +import com.johnsnowlabs.nlp.annotators.cv.feature_extractor.Preprocessor +import com.johnsnowlabs.nlp.annotators.cv.util.io.ImageIOUtils +import com.johnsnowlabs.nlp.annotators.cv.util.transform.ImageResizeUtils +import com.johnsnowlabs.nlp.annotators.tokenizer.bpe.BertTokenizer +import com.johnsnowlabs.nlp.annotators.tokenizer.wordpiece.WordpieceEncoder +import com.johnsnowlabs.nlp.{Annotation, AnnotationImage} +import org.tensorflow.ndarray.buffer.{IntDataBuffer, LongDataBuffer} + +import scala.collection.JavaConverters._ + +private[johnsnowlabs] class BLIPClassifier( + val tensorflowWrapper: TensorflowWrapper, + configProtoBytes: Option[Array[Byte]] = None, + tokenizer: BertTokenizer, + preprocessor: Preprocessor, + signatures: Option[Map[String, String]] = None, + vocabulary: Map[String, Int]) + extends Serializable { + + private val _tfBLIPSignatures: Map[String, String] = + signatures.getOrElse(ModelSignatureManager.apply()) + + def predict( + images: Array[AnnotationImage], + questions: Seq[Annotation], + maxSentenceLength: Int, + batchSize: Int): Seq[Annotation] = { + + val sentences = SentenceSplit.unpack(questions).toArray + val tokenizedSentences = TokenizedWithSentence.unpack(questions).toArray + val inputIds = encodeTokenizedSentence( + tokenizedSentences, + sentences, + batchSize, + maxSentenceLength, + caseSensitive = false) + + val pixelValues = images + .grouped(batchSize) + .flatMap { batch => + encodeImage(batch, preprocessor) + } + .toArray + + val outputs = generate(pixelValues, inputIds, maxSentenceLength) + val decodedOutput = tokenizer.decodeTokens(outputs) + Seq(Annotation(decodedOutput)) + } + + def generate( + imagesBatch: Array[Array[Array[Array[Float]]]], + inputsBatch: Array[Array[Int]], + maxSentenceLength: Int): Array[Int] = { + val tensors = new TensorResources() + val imageTensors = tensors.createTensor(imagesBatch) + + val batchLength = inputsBatch.length + // [nb of encoded sentences , maxSentenceLength] + val shape = Array(imagesBatch.length.toLong, maxSentenceLength) + + val tokenBuffers: IntDataBuffer = tensors.createIntBuffer(batchLength * maxSentenceLength) + val maskBuffers: LongDataBuffer = tensors.createLongBuffer(batchLength * maxSentenceLength) + + inputsBatch.zipWithIndex + .foreach { case (sentence, idx) => + val offset = idx * maxSentenceLength + tokenBuffers.offset(offset).write(sentence) + maskBuffers.offset(offset).write(sentence.map(x => if (x == 0) 0L else 1L)) + } + + val tokenTensors = tensors.createIntBufferTensor(shape, tokenBuffers) + val maskTensors = tensors.createLongBufferTensor(shape, maskBuffers) + + val runner = tensorflowWrapper + .getTFSessionWithSignature(configProtoBytes = configProtoBytes, initAllTables = false) + .runner + + runner + .feed( + _tfBLIPSignatures + .getOrElse(ModelSignatureConstants.InputIds.key, "missing_input_ids"), + tokenTensors) + .feed( + _tfBLIPSignatures + .getOrElse(ModelSignatureConstants.AttentionMask.key, "missing_input_mask_key"), + maskTensors) + .feed( + _tfBLIPSignatures + .getOrElse(ModelSignatureConstants.PixelValuesInput.key, "missing_pixel_values"), + imageTensors) + .fetch(_tfBLIPSignatures + .getOrElse(ModelSignatureConstants.DecoderOutput.key, "missing_output")) + + val outs = runner.run().asScala + val output = TensorResources.extractInts(outs.head) + + tensors.clearSession(outs) + tensors.clearTensors() + imageTensors.close() + + output + } + + /** Calculate softmax from returned logits + * @param scores + * logits output from output layer + * @return + */ + def calculateSoftmax(scores: Array[Float]): Array[Float] = { + val exp = scores.map(x => math.exp(x)) + exp.map(x => x / exp.sum).map(_.toFloat) + } + + private def encodeImage( + annotations: Array[AnnotationImage], + preprocessor: Preprocessor): Array[Array[Array[Array[Float]]]] = { + + val batchProcessedImages = annotations.map { annot => + val bufferedImage = ImageIOUtils.byteToBufferedImage( + bytes = annot.result, + w = annot.width, + h = annot.height, + nChannels = annot.nChannels) + + val resizedImage = if (preprocessor.do_resize) { + ImageResizeUtils.resizeBufferedImage( + width = preprocessor.size, + height = preprocessor.size, + preprocessor.resample)(bufferedImage) + } else bufferedImage + + val normalizedImage = + ImageResizeUtils.normalizeAndConvertBufferedImage( + img = resizedImage, + mean = preprocessor.image_mean, + std = preprocessor.image_std, + doNormalize = preprocessor.do_normalize, + doRescale = preprocessor.do_rescale, + rescaleFactor = preprocessor.rescale_factor) + + normalizedImage + } + + batchProcessedImages + + } + + def encodeTokenizedSentence( + tokenizedSentences: Seq[TokenizedSentence], + sentences: Seq[Sentence], + batchSize: Int, + maxSentenceLength: Int, + caseSensitive: Boolean): Array[Array[Int]] = { + val wordPieceTokenizedSentences = + tokenizeWithAlignment(tokenizedSentences, maxSentenceLength, caseSensitive) + + /*Run calculation by batches*/ + wordPieceTokenizedSentences + .zip(sentences) + .zipWithIndex + .grouped(batchSize) + .flatMap { batch => + val tokensBatch = batch.map(x => (x._1._1, x._2)) + tokenizer.encode(tokensBatch, maxSentenceLength) + } + .toArray + } + + def tokenizeWithAlignment( + sentences: Seq[TokenizedSentence], + maxSeqLength: Int, + caseSensitive: Boolean): Seq[WordpieceTokenizedSentence] = { + + val encoder = new WordpieceEncoder(vocabulary) + + sentences.map { tokenIndex => + // filter empty and only whitespace tokens + val bertTokens = + tokenIndex.indexedTokens.filter(x => x.token.nonEmpty && !x.token.equals(" ")).map { + token => + val content = if (caseSensitive) token.token else token.token.toLowerCase() + val sentenceBegin = token.begin + val sentenceEnd = token.end + val sentenceIndex = tokenIndex.sentenceIndex + val result = + tokenizer.tokenize(Sentence(content, sentenceBegin, sentenceEnd, sentenceIndex)) + if (result.nonEmpty) result.head else IndexedToken("") + } + val wordpieceTokens = bertTokens.flatMap(token => encoder.encode(token)).take(maxSeqLength) + WordpieceTokenizedSentence(wordpieceTokens) + } + } + +} diff --git a/src/main/scala/com/johnsnowlabs/nlp/AnnotationImage.scala b/src/main/scala/com/johnsnowlabs/nlp/AnnotationImage.scala index 72ef1c6d73a123..b566c3c5ccb7ea 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/AnnotationImage.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/AnnotationImage.scala @@ -48,7 +48,8 @@ case class AnnotationImage( nChannels: Int, mode: Int, result: Array[Byte], - metadata: Map[String, String]) + metadata: Map[String, String], + text: String = "") extends IAnnotation { override def equals(obj: Any): Boolean = { @@ -61,7 +62,8 @@ case class AnnotationImage( this.nChannels == annotation.nChannels && this.mode == annotation.mode && this.result.sameElements(annotation.result) && - this.metadata == annotation.metadata + this.metadata == annotation.metadata && + this.text == annotation.text case _ => false } } @@ -94,6 +96,10 @@ case class AnnotationImage( metadata } + def getText: String = { + text + } + } object AnnotationImage { @@ -112,7 +118,8 @@ object AnnotationImage { StructField("mode", IntegerType, nullable = false), // Bytes in OpenCV-compatible order: row-wise BGR in most cases StructField("result", BinaryType, nullable = false), - StructField("metadata", MapType(StringType, StringType), nullable = true))) + StructField("metadata", MapType(StringType, StringType), nullable = true), + StructField("text", StringType, nullable = true))) val arrayType = new ArrayType(dataType, true) @@ -122,7 +129,8 @@ object AnnotationImage { width: Int, nChannels: Int, mode: Int, - result: Array[Byte]) + result: Array[Byte], + text: String) /** This method converts a [[org.apache.spark.sql.Row]] into an [[AnnotationImage]] * @@ -132,6 +140,7 @@ object AnnotationImage { * AnnotationImage */ def apply(row: Row): AnnotationImage = { + println(s"row.getString(8): ${row.getString(8)}") AnnotationImage( row.getString(0), row.getString(1), @@ -140,7 +149,8 @@ object AnnotationImage { row.getInt(4), row.getInt(5), row.getAs[Array[Byte]](6), - row.getMap[String, String](7)) + row.getMap[String, String](7), + row.getString(8)) } def apply(image: ImageFields): AnnotationImage = @@ -152,6 +162,6 @@ object AnnotationImage { nChannels = image.nChannels, mode = image.mode, result = Array.emptyByteArray, - Map.empty[String, String]) - + metadata = Map.empty[String, String], + text = image.text) } diff --git a/src/main/scala/com/johnsnowlabs/nlp/HasBatchedAnnotateImage.scala b/src/main/scala/com/johnsnowlabs/nlp/HasBatchedAnnotateImage.scala index ded31e5e59cb51..d105c879143fbb 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/HasBatchedAnnotateImage.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/HasBatchedAnnotateImage.scala @@ -65,7 +65,8 @@ trait HasBatchedAnnotateImage[M <: Model[M]] { r.getInt(4), r.getInt(5), r.getAs(6), - r.getMap[String, String](7))) + r.getMap[String, String](7), + r.getString(8))) }) }) val outputAnnotations = batchAnnotate(inputAnnotations) diff --git a/src/main/scala/com/johnsnowlabs/nlp/ImageAssembler.scala b/src/main/scala/com/johnsnowlabs/nlp/ImageAssembler.scala index 3ef7ccd67d9803..73b08bae40d695 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/ImageAssembler.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/ImageAssembler.scala @@ -110,7 +110,26 @@ class ImageAssembler(override val uid: String) */ def getInputCol: String = $(inputCol) - setDefault(inputCol -> IMAGE, outputCol -> "image_assembler") + /** Input text column for processing + * + * @group param + */ + val textCol: Param[String] = + new Param[String](this, "textCol", "input text column for processing") + + /** Input text column for processing + * + * @group setParam + */ + def setTextCol(value: String): this.type = set(textCol, value) + + /** Input text column for processing + * + * @group getParam + */ + def getTextCol: String = $(textCol) + + setDefault(inputCol -> IMAGE, outputCol -> "image_assembler", textCol -> "text") def this() = this(Identifiable.randomUID("ImageAssembler")) @@ -118,7 +137,8 @@ class ImageAssembler(override val uid: String) private[nlp] def assemble( image: Option[ImageFields], - metadata: Map[String, String]): Seq[AnnotationImage] = { + metadata: Map[String, String], + text: Option[String] = None): Seq[AnnotationImage] = { if (image.isDefined) { Seq( @@ -130,14 +150,21 @@ class ImageAssembler(override val uid: String) nChannels = image.get.nChannels, mode = image.get.mode, result = image.get.data, - metadata = metadata)) + metadata = metadata, + text = text.getOrElse(""))) } else Seq.empty } private[nlp] def dfAssemble: UserDefinedFunction = udf { (image: ImageFields) => // Apache Spark has only 1 image per row - assemble(Some(image), Map("image" -> "0")) + assemble(Some(image), Map("image" -> "0"), None) + } + + private[nlp] def dfAssembleWithText: UserDefinedFunction = udf { + (image: ImageFields, text: String) => + // Apache Spark has only 1 image per row + assemble(Some(image), Map("image" -> "0"), Some(text)) } /** requirement for pipeline transformation validation. It is called on fit() */ @@ -163,7 +190,10 @@ class ImageAssembler(override val uid: String) ImageSchemaUtils.isImage(dataset.schema(getInputCol)), s"column $getInputCol doesn't have Apache Spark ImageSchema. Make sure you read your images via spark.read.format(image).load(PATH)") - val imageAnnotations = { + val textColExists = dataset.schema.fields.exists(_.name == getTextCol) + val imageAnnotations = if (textColExists) { + dfAssembleWithText(dataset.col($(inputCol)), dataset.col($(textCol))) + } else { dfAssemble(dataset($(inputCol))) } diff --git a/src/main/scala/com/johnsnowlabs/nlp/LightPipeline.scala b/src/main/scala/com/johnsnowlabs/nlp/LightPipeline.scala index 2271bd945c64b5..d6793fdba19e8e 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/LightPipeline.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/LightPipeline.scala @@ -44,7 +44,7 @@ class LightPipeline(val pipelineModel: PipelineModel, parseEmbeddings: Boolean = def fullAnnotate(target: String, optionalTarget: String = ""): Map[String, Seq[IAnnotation]] = { if (target.contains("/") && ResourceHelper.validFile(target)) { - fullAnnotateImage(target) + fullAnnotateImage(target, optionalTarget) } else { fullAnnotateInternal(target, optionalTarget) } @@ -60,7 +60,7 @@ class LightPipeline(val pipelineModel: PipelineModel, parseEmbeddings: Boolean = } if (targets.head.contains("/") && ResourceHelper.validFile(targets.head)) { - targets.par.map(target => fullAnnotateImage(target)).toArray + fullAnnotateImages(targets, optionalTargets) } else { (targets zip optionalTargets).par.map { case (target, optionalTarget) => fullAnnotate(target, optionalTarget) @@ -68,14 +68,20 @@ class LightPipeline(val pipelineModel: PipelineModel, parseEmbeddings: Boolean = } } - def fullAnnotateImage(pathToImages: Array[String]): Array[Map[String, Seq[IAnnotation]]] = { - pathToImages.par - .map(imageFilePath => fullAnnotateInternal(imageFilePath)) - .toArray + def fullAnnotateImages( + pathToImages: Array[String], + texts: Array[String] = Array.empty): Array[Map[String, Seq[IAnnotation]]] = { + val safeTexts = if (texts.isEmpty) Array.fill(pathToImages.length)("") else texts + (pathToImages zip safeTexts).par.map { case (imageFilePath, text) => + fullAnnotateImage(imageFilePath, text) + }.toArray } - def fullAnnotateImage(pathToImage: String): Map[String, Seq[IAnnotation]] = { - fullAnnotateInternal(pathToImage) + def fullAnnotateImage(pathToImage: String, text: String = ""): Map[String, Seq[IAnnotation]] = { + val isValidFile = ResourceHelper.validFile(pathToImage) + if (!isValidFile || isValidFile && text.isEmpty) { + Map() + } else fullAnnotateInternal(pathToImage, text) } def fullAnnotate(audio: Array[Double]): Map[String, Seq[IAnnotation]] = { @@ -108,7 +114,7 @@ class LightPipeline(val pipelineModel: PipelineModel, parseEmbeddings: Boolean = optionalTarget, annotations) case imageAssembler: ImageAssembler => - processImageAssembler(target, imageAssembler, annotations) + processImageAssembler(target, optionalTarget, imageAssembler, annotations) case audioAssembler: AudioAssembler => processAudioAssembler(audio, audioAssembler, annotations) case lazyAnnotator: AnnotatorModel[_] if lazyAnnotator.getLazyAnnotator => annotations @@ -157,12 +163,13 @@ class LightPipeline(val pipelineModel: PipelineModel, parseEmbeddings: Boolean = private def processImageAssembler( target: String, + text: String, imageAssembler: ImageAssembler, annotations: Map[String, Seq[IAnnotation]]): Map[String, Seq[IAnnotation]] = { val currentImageFields = ImageIOUtils.imagePathToImageFields(target) annotations.updated( imageAssembler.getOutputCol, - imageAssembler.assemble(currentImageFields, Map.empty[String, String])) + imageAssembler.assemble(currentImageFields, Map.empty[String, String], Some(text))) } private def processAudioAssembler( @@ -209,9 +216,9 @@ class LightPipeline(val pipelineModel: PipelineModel, parseEmbeddings: Boolean = getCombinedAnnotations(batchedAnnotatorImage.getInputCols, annotations) val batchedAnnotations = Seq(combinedAnnotations.map(_.asInstanceOf[AnnotationImage])) - annotations.updated( - batchedAnnotatorImage.getOutputCol, - batchedAnnotatorImage.batchAnnotate(batchedAnnotations).head) + val outputCol = batchedAnnotatorImage.getOutputCol + val annotateResult = batchedAnnotatorImage.batchAnnotate(batchedAnnotations) + annotations.updated(outputCol, annotateResult.head) } private def processBatchedAnnotatorAudio( @@ -361,15 +368,35 @@ class LightPipeline(val pipelineModel: PipelineModel, parseEmbeddings: Boolean = fullAnnotateImage(pathToImage).mapValues(_.asJava).asJava } - def fullAnnotateImageJava(pathToImages: java.util.ArrayList[String]) + import scala.collection.JavaConverters._ + + def fullAnnotateImageJava( + pathToImages: java.util.ArrayList[String], + texts: java.util.ArrayList[String]) : java.util.List[java.util.Map[String, java.util.List[IAnnotation]]] = { - pathToImages.asScala.par - .map { imageFilePath => - fullAnnotateInternal(imageFilePath).mapValues(_.asJava).asJava + if (texts.isEmpty) { + pathToImages.asScala.par + .map { imageFilePath => + fullAnnotateInternal(imageFilePath).mapValues(_.asJava).asJava + } + .toList + .asJava + } else { + + if (pathToImages.size != texts.size) { + throw new IllegalArgumentException( + "pathToImages and texts must have the same number of elements.") } - .toList - .asJava + val imageTextPairs = pathToImages.asScala.zip(texts.asScala).par + + imageTextPairs + .map { case (imageFilePath, text) => + fullAnnotateImage(imageFilePath, text).mapValues(_.asJava).asJava + } + .toList + .asJava + } } def fullAnnotateSingleAudioJava( @@ -394,14 +421,16 @@ class LightPipeline(val pipelineModel: PipelineModel, parseEmbeddings: Boolean = } def annotate(target: String, optionalTarget: String = ""): Map[String, Seq[String]] = { - fullAnnotate(target, optionalTarget).mapValues(_.map { iAnnotation => - val annotation = iAnnotation.asInstanceOf[Annotation] - annotation.annotatorType match { - case AnnotatorType.WORD_EMBEDDINGS | AnnotatorType.SENTENCE_EMBEDDINGS - if parseEmbeddings => - annotation.embeddings.mkString(" ") - case _ => annotation.result - } + val annotations = fullAnnotate(target, optionalTarget) + annotations.mapValues(_.map { + case annotation: Annotation => + annotation.annotatorType match { + case AnnotatorType.WORD_EMBEDDINGS | AnnotatorType.SENTENCE_EMBEDDINGS + if parseEmbeddings => + annotation.embeddings.mkString(" ") + case _ => annotation.result + } + case _ => "" }) } diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnswering.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnswering.scala new file mode 100644 index 00000000000000..9cd5bca6ff9e35 --- /dev/null +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnswering.scala @@ -0,0 +1,301 @@ +/* + * Copyright 2017-2024 John Snow Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.johnsnowlabs.nlp.annotators.cv + +import com.johnsnowlabs.ml.ai.BLIPClassifier +import com.johnsnowlabs.ml.tensorflow.{ + ReadTensorflowModel, + TensorflowWrapper, + WriteTensorflowModel +} +import com.johnsnowlabs.ml.util.LoadExternalModel.{ + loadJsonStringAsset, + loadTextAsset, + modelSanityCheck, + notSupportedEngineError +} +import com.johnsnowlabs.ml.util.TensorFlow +import com.johnsnowlabs.nlp.AnnotatorType.{DOCUMENT, IMAGE} +import com.johnsnowlabs.nlp._ +import com.johnsnowlabs.nlp.annotators.{RegexTokenizer, Tokenizer, TokenizerModel} +import com.johnsnowlabs.nlp.annotators.cv.feature_extractor.Preprocessor +import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector +import com.johnsnowlabs.nlp.annotators.tokenizer.bpe.{BertTokenizer, SpecialTokens} +import com.johnsnowlabs.nlp.annotators.tokenizer.wordpiece.BasicTokenizer +import com.johnsnowlabs.nlp.serialization.MapFeature +import org.apache.spark.broadcast.Broadcast +import org.apache.spark.ml.param.{IntArrayParam, IntParam} +import org.apache.spark.ml.util.Identifiable +import org.apache.spark.sql.SparkSession + +class BLIPForQuestionAnswering(override val uid: String) + extends AnnotatorModel[BLIPForQuestionAnswering] + with HasBatchedAnnotateImage[BLIPForQuestionAnswering] + with HasImageFeatureProperties + with WriteTensorflowModel + with HasEngine { + + /** Annotator reference id. Used to identify elements in metadata or to refer to this annotator + * type + */ + def this() = this(Identifiable.randomUID("BLIPForQuestionAnswering")) + + /** Annotator reference id. Used to identify elements in metadata or to refer to this annotator + * type + */ + override val inputAnnotatorTypes: Array[AnnotatorType] = Array(IMAGE) + override val outputAnnotatorType: AnnotatorType = DOCUMENT + + /** ConfigProto from tensorflow, serialized into byte array. Get with + * config_proto.SerializeToString() + * + * @group param + */ + val configProtoBytes = new IntArrayParam( + this, + "configProtoBytes", + "ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()") + + /** ConfigProto from tensorflow, serialized into byte array. Get with + * config_proto.SerializeToString() + * + * @group setParam + */ + def setConfigProtoBytes(bytes: Array[Int]): BLIPForQuestionAnswering.this.type = + set(this.configProtoBytes, bytes) + + /** ConfigProto from tensorflow, serialized into byte array. Get with + * config_proto.SerializeToString() + * + * @group getParam + */ + def getConfigProtoBytes: Option[Array[Byte]] = + get(this.configProtoBytes).map(_.map(_.toByte)) + + /** It contains TF model signatures for the laded saved model + * + * @group param + */ + val signatures = + new MapFeature[String, String](model = this, name = "signatures").setProtected() + + /** @group setParam */ + def setSignatures(value: Map[String, String]): this.type = { + set(signatures, value) + this + } + + /** @group getParam */ + def getSignatures: Option[Map[String, String]] = get(this.signatures) + + /** Vocabulary used to encode the words to ids with WordPieceEncoder + * + * @group param + */ + val vocabulary: MapFeature[String, Int] = new MapFeature(this, "vocabulary").setProtected() + + /** @group setParam */ + def setVocabulary(value: Map[String, Int]): this.type = set(vocabulary, value) + + /** @group getParam */ + protected[nlp] def getVocabulary: Map[String, Int] = $$(vocabulary) + + /** Max sentence length to process (Default: `512`) + * + * @group param + */ + val maxSentenceLength = + new IntParam(this, "maxSentenceLength", "Max sentence length to process") + + /** @group setParam */ + def setMaxSentenceLength(value: Int): this.type = { + set(maxSentenceLength, value) + this + } + + /** @group getParam */ + def getMaxSentenceLength: Int = $(maxSentenceLength) + + private var _model: Option[Broadcast[BLIPClassifier]] = None + + /** @group setParam */ + def setModelIfNotSet( + spark: SparkSession, + preprocessor: Preprocessor, + tensorflow: TensorflowWrapper): this.type = { + if (_model.isEmpty) { + + val specialTokens = SpecialTokens.getSpecialTokensForModel("bert", getVocabulary) + val bertTokenizer = new BertTokenizer(getVocabulary, specialTokens) + + _model = Some( + spark.sparkContext.broadcast( + new BLIPClassifier( + tensorflow, + configProtoBytes = getConfigProtoBytes, + tokenizer = bertTokenizer, + preprocessor = preprocessor, + signatures = getSignatures, + vocabulary = $$(vocabulary)))) + } + this + } + + /** @group getParam */ + def getModelIfNotSet: BLIPClassifier = _model.get.value + + setDefault(batchSize -> 8, size -> 384, maxSentenceLength -> 50) + + /** takes a document and annotations and produces new annotations of this annotator's annotation + * type + * + * @param batchedAnnotations + * Annotations in batches that correspond to inputAnnotationCols generated by previous + * annotators if any + * @return + * any number of annotations processed for every batch of input annotations. Not necessary + * one to one relationship + */ + override def batchAnnotate( + batchedAnnotations: Seq[Array[AnnotationImage]]): Seq[Seq[Annotation]] = { + + batchedAnnotations + .filter { annotationImages => + annotationImages.exists(_.text.nonEmpty) + } + .map { cleanAnnotationImages => + val validImages = cleanAnnotationImages.filter(_.result.nonEmpty) + val questionAnnotations = extractInputAnnotation(validImages) + + getModelIfNotSet.predict( + validImages, + questionAnnotations, + $(batchSize), + $(maxSentenceLength)) + } + } + + private def extractInputAnnotation( + annotationImages: Array[AnnotationImage]): Seq[Annotation] = { + val questions = annotationImages.map(annotationImage => Annotation(annotationImage.text)) + val sentenceAnnotations = + new SentenceDetector().setInputCols("document").setOutputCol("sentence") + val sentencesQuestions = sentenceAnnotations.annotate(questions) + + val tokenizerAnnotation = new RegexTokenizer().setInputCols("sentence").setOutputCol("token") + val tokenQuestions = tokenizerAnnotation.annotate(sentencesQuestions) + + sentencesQuestions ++ tokenQuestions + } + + override def onWrite(path: String, spark: SparkSession): Unit = { + super.onWrite(path, spark) + writeTensorflowModelV2( + path, + spark, + getModelIfNotSet.tensorflowWrapper, + "_image_qa", + BLIPForQuestionAnswering.tfFile, + configProtoBytes = getConfigProtoBytes) + } + +} + +trait ReadablePretrainedBLIPForQuestionAnswering + extends ParamsAndFeaturesReadable[BLIPForQuestionAnswering] + with HasPretrained[BLIPForQuestionAnswering] { + + override val defaultModelName: Some[String] = Some("blip_vqa_tf") + + /** Java compliant-overrides */ + override def pretrained(): BLIPForQuestionAnswering = super.pretrained() + + override def pretrained(name: String): BLIPForQuestionAnswering = + super.pretrained(name) + + override def pretrained(name: String, lang: String): BLIPForQuestionAnswering = + super.pretrained(name, lang) + + override def pretrained( + name: String, + lang: String, + remoteLoc: String): BLIPForQuestionAnswering = + super.pretrained(name, lang, remoteLoc) + +} + +trait ReadBLIPForQuestionAnsweringDLModel extends ReadTensorflowModel { + this: ParamsAndFeaturesReadable[BLIPForQuestionAnswering] => + override val tfFile: String = "blip_vqa_tensorflow" + + def readModel(instance: BLIPForQuestionAnswering, path: String, spark: SparkSession): Unit = { + val tf = readTensorflowModel(path, spark, "_blip_vqa_tf", initAllTables = false) + + val preprocessor = Preprocessor( + do_normalize = true, + do_resize = true, + "BLIPFeatureExtractor", + instance.getImageMean, + instance.getImageStd, + instance.getResample, + instance.getSize) + + instance.setModelIfNotSet(spark, preprocessor, tf) + } + + addReader(readModel) + + def loadSavedModel(modelPath: String, spark: SparkSession): BLIPForQuestionAnswering = { + val (localModelPath, detectedEngine) = modelSanityCheck(modelPath) + val preprocessorConfigJsonContent = + loadJsonStringAsset(localModelPath, "preprocessor_config.json") + val preprocessorConfig = Preprocessor.loadPreprocessorConfig(preprocessorConfigJsonContent) + val vocabs = loadTextAsset(localModelPath, "vocab.txt").zipWithIndex.toMap + + val annotatorModel = new BLIPForQuestionAnswering() + annotatorModel.set(annotatorModel.engine, detectedEngine) + + detectedEngine match { + case TensorFlow.name => + val (wrapper, signatures) = + TensorflowWrapper.read(localModelPath, zipped = false, useBundle = true) + + val _signatures = signatures match { + case Some(s) => s + case None => throw new Exception("Cannot load signature definitions from model!") + } + + /** the order of setSignatures is important if we use getSignatures inside + * setModelIfNotSet + */ + annotatorModel + .setVocabulary(vocabs) + .setSignatures(_signatures) + .setModelIfNotSet(spark, preprocessorConfig, wrapper) + .setSize(384) + + case _ => + throw new Exception(notSupportedEngineError) + } + + annotatorModel + } +} + +object BLIPForQuestionAnswering + extends ReadablePretrainedBLIPForQuestionAnswering + with ReadBLIPForQuestionAnsweringDLModel diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/tokenizer/bpe/BertTokenizer.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/tokenizer/bpe/BertTokenizer.scala new file mode 100644 index 00000000000000..d3650367bbe1cf --- /dev/null +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/tokenizer/bpe/BertTokenizer.scala @@ -0,0 +1,81 @@ +/* + * Copyright 2017-2024 John Snow Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.johnsnowlabs.nlp.annotators.tokenizer.bpe + +import com.johnsnowlabs.nlp.annotators.common.WordpieceTokenizedSentence +import com.johnsnowlabs.nlp.annotators.tokenizer.wordpiece.BasicTokenizer + +import java.nio.charset.Charset +import scala.collection.mutable.ListBuffer + +class BertTokenizer(val vocab: Map[String, Int], val specialTokens: SpecialTokens) + extends BasicTokenizer { + + /** Encode the input sequence to indexes IDs adding padding where necessary */ + def encode( + sentences: Seq[(WordpieceTokenizedSentence, Int)], + maxSequenceLength: Int): Seq[Array[Int]] = { + val maxSentenceLength = + Array( + maxSequenceLength - 2, + sentences.map { case (wpTokSentence, _) => + wpTokSentence.tokens.length + }.max).min + + sentences + .map { case (wpTokSentence, _) => + val tokenPieceIds = wpTokSentence.tokens.map(t => t.pieceId) + val padding = Array.fill(maxSentenceLength - tokenPieceIds.length)(specialTokens.pad.id) + + Array(specialTokens.sentenceStart.id) ++ tokenPieceIds.take(maxSentenceLength) ++ Array( + specialTokens.sentenceEnd.id) ++ padding + } + } + + def decodeTokens(tokens: Array[Int]): String = { + val specialTokens = SpecialTokens.getSpecialTokensForModel("bert", vocab) + val decoderVocab: Map[Int, String] = vocab.map(x => (x._2, x._1)) + val unicodeToByteMapping: Map[String, Int] = + bytesToUnicodeMapping.map(x => (x._2, x._1)) + val text = tokens + .map(token => decoderVocab.getOrElse(token, "")) + .filter(x => !specialTokens.contains(x)) + .mkString("") + val bytes = text.map(x => unicodeToByteMapping(x.toString)).map(x => x.toByte).toArray + new String(bytes, Charset.forName("UTF-8")) + } + + /** Mapping for bytes to a different set of unicode characters (especially white spaces). This + * improved model performance for gpt-2 + */ + protected val bytesToUnicodeMapping: Map[Int, String] = { + val bytes: ListBuffer[Int] = + ListBuffer.range('!', '~' + 1) ++ ListBuffer.range('¡', '¬' + 1) ++ ListBuffer + .range('®', 'ÿ' + 1) + val characters: ListBuffer[Int] = bytes.clone + var n = 0 + for (b <- 0 to 256) { + if (!bytes.contains(b)) { + bytes += b + characters += (256 + n) + n += 1 + } + } + (bytes zip characters.map(_.toChar.toString)).toMap + } + +} diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/tokenizer/bpe/BpeSpecialTokens.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/tokenizer/bpe/BpeSpecialTokens.scala index eb2769a4ad7458..4afb1d5b9bf18c 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/tokenizer/bpe/BpeSpecialTokens.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/tokenizer/bpe/BpeSpecialTokens.scala @@ -170,6 +170,14 @@ private[johnsnowlabs] object SpecialTokens { unkTokenString = "<|endoftext|>", maskTokenString = "<|endoftext|>", padTokenString = "<|endoftext|>") + case "bert" => + SpecialTokens( + vocab, + startTokenString = "[CLS]", + endTokenString = "[SEP]", + unkTokenString = "[UNK]", + maskTokenString = "[MASK]", + padTokenString = "[PAD]") } } diff --git a/src/main/scala/com/johnsnowlabs/nlp/pretrained/PretrainedPipeline.scala b/src/main/scala/com/johnsnowlabs/nlp/pretrained/PretrainedPipeline.scala index 59747ec2c14f21..53ab187d6eca16 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/pretrained/PretrainedPipeline.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/pretrained/PretrainedPipeline.scala @@ -119,7 +119,7 @@ case class PretrainedPipeline( } def fullAnnotateImage(pathToImages: Array[String]): Array[Map[String, Seq[IAnnotation]]] = { - lightModel.fullAnnotateImage(pathToImages) + lightModel.fullAnnotateImages(pathToImages) } def fullAnnotate(audio: Array[Float]): Map[String, Seq[IAnnotation]] = { @@ -157,9 +157,14 @@ case class PretrainedPipeline( lightModel.fullAnnotateImageJava(pathToImage) } - def fullAnnotateImageJava(pathToImages: java.util.ArrayList[String]) + def fullAnnotateImageJava( + pathToImages: java.util.ArrayList[String], + texts: java.util.ArrayList[String]) : java.util.List[java.util.Map[String, java.util.List[IAnnotation]]] = { - lightModel.fullAnnotateJava(pathToImages) + if (texts.isEmpty) { + lightModel.fullAnnotateJava(pathToImages) + } else lightModel.fullAnnotateImageJava(pathToImages, texts) + } def fullAnnotateSingleAudioJava( diff --git a/src/test/scala/com/johnsnowlabs/nlp/AssertAnnotations.scala b/src/test/scala/com/johnsnowlabs/nlp/AssertAnnotations.scala index d1991a8c5db95a..423cb03f8929ed 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/AssertAnnotations.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/AssertAnnotations.scala @@ -105,9 +105,10 @@ object AssertAnnotations { val mode = columnName + ".mode" val result = columnName + ".result" val metadata = columnName + ".metadata" + val text = columnName + ".text" dataSet - .select(annotatorType, origin, height, width, nChannels, mode, result, metadata) + .select(annotatorType, origin, height, width, nChannels, mode, result, metadata, text) .rdd .map { row => val annotatorTypeSeq: Seq[String] = row @@ -134,6 +135,9 @@ object AssertAnnotations { val metadataSeq: Seq[Map[String, String]] = row .getAs[Map[String, String]]("metadata") .asInstanceOf[mutable.WrappedArray[Map[String, String]]] + val textSeq: Seq[String] = row + .getAs[String]("text") + .asInstanceOf[mutable.WrappedArray[String]] originSeq.zipWithIndex.map { case (origin, index) => AnnotationImage( @@ -144,7 +148,8 @@ object AssertAnnotations { nChannelsSeq(index), modeSeq(index), resultSeq(index).asInstanceOf[Array[Byte]], - metadataSeq(index)) + metadataSeq(index), + textSeq(index)) } } .collect() diff --git a/src/test/scala/com/johnsnowlabs/nlp/ImageAssemblerTest.scala b/src/test/scala/com/johnsnowlabs/nlp/ImageAssemblerTest.scala index d9baaf6fa38a82..d48686bafe9c4f 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/ImageAssemblerTest.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/ImageAssemblerTest.scala @@ -21,6 +21,7 @@ import com.johnsnowlabs.nlp.util.io.ResourceHelper import com.johnsnowlabs.tags.{FastTest, SlowTest} import org.apache.spark.ml.Pipeline import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.lit import org.scalatest.flatspec.AnyFlatSpec class ImageAssemblerTest extends AnyFlatSpec { @@ -42,9 +43,32 @@ class ImageAssemblerTest extends AnyFlatSpec { val assembled = imageAssembler.transform(dataFrame) val result = AssertAnnotations.getActualImageResult(assembled, "image_assembler") - assert(result.nonEmpty) + result.foreach(annotationImages => + annotationImages.foreach { annotationImage => + assert(annotationImage.annotatorType == IMAGE) + assert(annotationImage.origin.contains(imagesPath)) + assert(annotationImage.height >= 0) + assert(annotationImage.width >= 0) + assert(annotationImage.nChannels >= 0) + assert(annotationImage.mode >= 0) + assert(annotationImage.result.nonEmpty) + assert(annotationImage.metadata.nonEmpty) + assert(annotationImage.text.isEmpty) + }) + } + + it should "work with text column" in { + + val testDF: DataFrame = dataFrame.withColumn("text", lit("What's this picture about?")) + val imageAssembler: ImageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + + val assembled = imageAssembler.transform(testDF) + val result = AssertAnnotations.getActualImageResult(assembled, "image_assembler") + assert(result.nonEmpty) result.foreach(annotationImages => annotationImages.foreach { annotationImage => assert(annotationImage.annotatorType == IMAGE) @@ -55,6 +79,7 @@ class ImageAssemblerTest extends AnyFlatSpec { assert(annotationImage.mode >= 0) assert(annotationImage.result.nonEmpty) assert(annotationImage.metadata.nonEmpty) + assert(annotationImage.text.nonEmpty) }) } @@ -82,7 +107,7 @@ class ImageAssemblerTest extends AnyFlatSpec { val pipeline: Pipeline = new Pipeline().setStages(Array(imageAssembler)) val pipelineModel = pipeline.fit(emptyDF) val lightPipeline = new LightPipeline(pipelineModel) - val result = lightPipeline.fullAnnotateImage(images) + val result = lightPipeline.fullAnnotateImages(images) assert(result.length == images.length) result.foreach(annotation => assert(annotation("image_assembler").nonEmpty)) diff --git a/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnsweringTest.scala b/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnsweringTest.scala new file mode 100644 index 00000000000000..3b068b6e47a5c9 --- /dev/null +++ b/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnsweringTest.scala @@ -0,0 +1,186 @@ +/* + * Copyright 2017-2024 John Snow Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.johnsnowlabs.nlp.annotators.cv + +import com.johnsnowlabs.nlp.base.LightPipeline +import com.johnsnowlabs.nlp.util.io.ResourceHelper +import com.johnsnowlabs.nlp.{Annotation, AssertAnnotations, ImageAssembler} +import com.johnsnowlabs.tags.SlowTest +import org.apache.spark.ml.Pipeline +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.lit +import org.scalatest.flatspec.AnyFlatSpec + +class BLIPForQuestionAnsweringTest extends AnyFlatSpec { + + private val modelsPath = "/models/transformers" + val tfModelPath = s"$modelsPath/tf/blip-vqa-tf/Salesforce/blip-vqa-base/saved_model/1" + val sparkNLPModelPath = s"$modelsPath/spark-nlp/tf/blip-vqa" + + val model = getBLIPForQuestionAnsweringPipelineModel + + "BLIP" should "load and save model" ignore { + val blipForQuestionAnswering = BLIPForQuestionAnswering + .loadSavedModel(tfModelPath, ResourceHelper.spark) + .setSize(384) + + blipForQuestionAnswering.write.overwrite().save(sparkNLPModelPath) + } + + "BLIP" should "answer a question for a given image" taggedAs SlowTest in { + + val testDF = getTestDF + val result = model.transform(testDF) + + val answerAnnotation = AssertAnnotations.getActualResult(result, "answer") + + answerAnnotation.foreach { annotation => + annotation.foreach(a => assert(a.result.nonEmpty)) + } + } + + it should "work with light pipeline annotate" taggedAs SlowTest in { + val lightPipeline = new LightPipeline(model) + val imagePath = "src/test/resources/image/egyptian_cat.jpeg" + val resultAnnotate = lightPipeline.annotate(imagePath, "What's this picture about?") + println(s"resultAnnotate: $resultAnnotate") + + assert(resultAnnotate("answer").head.contains("cat")) + } + + it should "work with light pipeline full annotate" taggedAs SlowTest in { + val lightPipeline = new LightPipeline(model) + val imagePath = "src/test/resources/image/bluetick.jpg" + val resultFullAnnotate = + lightPipeline.fullAnnotateImage(imagePath, "What's this picture about?") + + val answerAnnotation = resultFullAnnotate("answer").head.asInstanceOf[Annotation] + + println(s"imageName.result: ${answerAnnotation.result}") + assert(answerAnnotation.result.nonEmpty) + } + + it should "fullAnnotate with empty Map when a text is empty" taggedAs SlowTest in { + val lightPipeline = new LightPipeline(model) + val imagesPath = Array( + "src/test/resources/image/bluetick.jpg", + "src/test/resources/image/chihuahua.jpg", + "src/test/resources/image/egyptian_cat.jpeg") + val question = "What's this picture about?" + val questions = Array(question, "", question) + + val resultFullAnnotate = lightPipeline.fullAnnotateImages(imagesPath, questions) + + resultFullAnnotate.zip(imagesPath).foreach { case (annotateMap, imagePath) => + imagePath match { + case "src/test/resources/image/chihuahua.jpg" => + // For the chihuahua image, the annotateMap should be empty because the question is empty + assert( + annotateMap.isEmpty, + s"Expected empty map for image: $imagePath, but got: $annotateMap") + + case _ => + assert(annotateMap.nonEmpty, s"Expected non-empty map for image: $imagePath") + + annotateMap.get("answer") match { + case Some(annotations) => + annotations.foreach { iAnnotation => + val annotation = iAnnotation.asInstanceOf[Annotation] + assert( + annotation.result.nonEmpty, + s"Expected non-empty result for image: $imagePath, but got empty result") + } + case None => + fail(s"'answer' key not found in annotateMap for image: $imagePath") + } + } + } + } + + it should "annotate with empty Map when a text is empty" taggedAs SlowTest in { + val lightPipeline = new LightPipeline(model) + val imagesPath = Array( + "src/test/resources/image/bluetick.jpg", + "src/test/resources/image/chihuahua.jpg", + "src/test/resources/image/egyptian_cat.jpeg") + val question = "What's this picture about?" + val questions = Array(question, "", question) + + val resultAnnotate = lightPipeline.annotate(imagesPath, questions) + + resultAnnotate.foreach { annotate => + println(s"annotate: $annotate") + } + + resultAnnotate.zip(imagesPath).foreach { case (annotateMap, imagePath) => + imagePath match { + case "src/test/resources/image/chihuahua.jpg" => + // For the chihuahua image, the annotateMap should be empty because the question is empty + assert( + annotateMap.isEmpty, + s"Expected empty map for image: $imagePath, but got: $annotateMap") + + case _ => + assert(annotateMap.nonEmpty, s"Expected non-empty map for image: $imagePath") + + annotateMap.get("answer") match { + case Some(annotations) => + annotations.foreach { annotation => + assert( + annotation.nonEmpty, + s"Expected non-empty result for image: $imagePath, but got empty result") + } + case None => + fail(s"'answer' key not found in annotateMap for image: $imagePath") + } + } + } + + } + + private def getBLIPForQuestionAnsweringPipelineModel = { + val testDF = getTestDF + + val imageAssembler: ImageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + + val loadModel = BLIPForQuestionAnswering + .pretrained() + .setInputCols("image_assembler") + .setOutputCol("answer") + .setSize(384) + + val newPipeline: Pipeline = + new Pipeline().setStages(Array(imageAssembler, loadModel)) + + newPipeline.fit(testDF) + } + + private def getTestDF: DataFrame = { + val imageFolder = "src/test/resources/image/" + val imageDF: DataFrame = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load(imageFolder) + + val testDF: DataFrame = imageDF.withColumn("text", lit("What's this picture about?")) + + testDF + } + +} diff --git a/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/CLIPForZeroShotClassificationTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/CLIPForZeroShotClassificationTestSpec.scala index 85b43a790634ab..92491fc1abddac 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/CLIPForZeroShotClassificationTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/CLIPForZeroShotClassificationTestSpec.scala @@ -74,7 +74,7 @@ class CLIPForZeroShotClassificationTestSpec extends AnyFlatSpec { val pipelineModel = pipeline.fit(imageDF) val lightPipeline = new LightPipeline(pipelineModel) val images = expected.keys.map(imageFolder + _).toArray - val result = lightPipeline.fullAnnotateImage(images) + val result = lightPipeline.fullAnnotateImages(images) result.foreach { row: Map[String, Seq[IAnnotation]] => val imageName = diff --git a/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/ViTImageClassificationTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/ViTImageClassificationTestSpec.scala index fdf2e43b574a81..0eacd5378bde6f 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/ViTImageClassificationTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/ViTImageClassificationTestSpec.scala @@ -159,7 +159,7 @@ trait ViTForImageClassificationBehaviors { this: AnyFlatSpec => val images = Array("src/test/resources/image/hen.JPEG", "src/test/resources/image/missing_file.mf") - val predictions = lightPipeline.fullAnnotateImage(images) + val predictions = lightPipeline.fullAnnotateImages(images) assert(predictions(0)("image_assembler").nonEmpty) assert(predictions(0)("class").nonEmpty) @@ -185,7 +185,7 @@ trait ViTForImageClassificationBehaviors { this: AnyFlatSpec => val images = Array("src/test/resources/image/hen.JPEG", "this is a text") - val predictions = lightPipeline.fullAnnotateImage(images) + val predictions = lightPipeline.fullAnnotateImages(images) assert(predictions(0)("image_assembler").nonEmpty) assert(predictions(0)("class").nonEmpty) @@ -232,7 +232,7 @@ class ViTImageClassificationTestSpec extends AnyFlatSpec with ViTForImageClassif "tractor.JPEG" -> "tractor", "ox.JPEG" -> "ox") - private lazy val model: ViTForImageClassification = ViTForImageClassification.pretrained() + private val model: ViTForImageClassification = ViTForImageClassification.pretrained() it should behave like behaviorsViTForImageClassification[ViTForImageClassification]( diff --git a/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/VisionEncoderDecoderForImageCaptioningTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/VisionEncoderDecoderForImageCaptioningTestSpec.scala index 64aae2c9d330b9..b67e2684ea432a 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/VisionEncoderDecoderForImageCaptioningTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/VisionEncoderDecoderForImageCaptioningTestSpec.scala @@ -88,7 +88,7 @@ class VisionEncoderDecoderForImageCaptioningTestSpec extends AnyFlatSpec { val pipelineModel = pipeline.fit(imageDF) val lightPipeline = new LightPipeline(pipelineModel) val image = imageFolder + "egyptian_cat.jpeg" - val results = lightPipeline.fullAnnotateImage(Array(image, image)) + val results = lightPipeline.fullAnnotateImages(Array(image, image)) results.foreach { result => assert(result("image_assembler").nonEmpty) From af0c319331829bb7e982dd077b3670b1a229509c Mon Sep 17 00:00:00 2001 From: Danilo Burbano Date: Wed, 2 Oct 2024 18:09:26 -0500 Subject: [PATCH 2/6] [SPARKNLP-1068] Adding BLIPForQuestionAnswering import notebook example --- ...n_Spark_NLP_BLIPForQuestionAnswering.ipynb | 3425 +++++++++++++++++ 1 file changed, 3425 insertions(+) create mode 100644 examples/python/transformers/HuggingFace_in_Spark_NLP_BLIPForQuestionAnswering.ipynb diff --git a/examples/python/transformers/HuggingFace_in_Spark_NLP_BLIPForQuestionAnswering.ipynb b/examples/python/transformers/HuggingFace_in_Spark_NLP_BLIPForQuestionAnswering.ipynb new file mode 100644 index 00000000000000..c1e15d7d45bf1f --- /dev/null +++ b/examples/python/transformers/HuggingFace_in_Spark_NLP_BLIPForQuestionAnswering.ipynb @@ -0,0 +1,3425 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "UiBTGTRfSCQh" + }, + "source": [ + "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CLIP.ipynb)\n", + "\n", + "# Import ONNX BLIP models from HuggingFace 🤗 into Spark NLP 🚀\n", + "\n", + "Let's keep in mind a few things before we start 😊\n", + "\n", + "- This feature is only in `Spark NLP 5.5.1` and after. So please make sure you have upgraded to the latest Spark NLP release\n", + "- You can import BLIP models trained/fine-tuned for question answering via `TFBlipForQuestionAnswering`.\n", + "- Reference: [TFBlipForQuestionAnswering](https://huggingface.co/docs/transformers/en/model_doc/blip#transformers.TFBlipForQuestionAnswering)\n", + "- Some [example models](https://huggingface.co/models?pipeline_tag=visual-question-answering&sort=trending&search=BLIP)\n", + "- To execute this notebook on Google Colab you will need an A100 or similar instance" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vkGbcTagUK4P" + }, + "source": [ + "## Export and Save HuggingFace model\n", + "\n", + "- We lock TensorFlow on `2.11.0` version and Transformers on `4.39.3`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "N9RXtKzHaEvi", + "outputId": "5631c0ca-0f5f-4f38-c9ab-9a5591906067" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m40.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m46.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m77.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m22.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m86.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m41.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "cudf-cu12 24.4.1 requires protobuf<5,>=3.20, but you have protobuf 3.19.6 which is incompatible.\n", + "google-cloud-aiplatform 1.67.1 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.2, but you have protobuf 3.19.6 which is incompatible.\n", + "google-cloud-bigquery-connection 1.15.5 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.2, but you have protobuf 3.19.6 which is incompatible.\n", + "google-cloud-bigquery-storage 2.26.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.2, but you have protobuf 3.19.6 which is incompatible.\n", + "google-cloud-bigtable 2.26.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.2, but you have protobuf 3.19.6 which is incompatible.\n", + "google-cloud-functions 1.16.5 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.2, but you have protobuf 3.19.6 which is incompatible.\n", + "google-cloud-iam 2.15.2 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.2, but you have protobuf 3.19.6 which is incompatible.\n", + "google-cloud-language 2.13.4 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.2, but you have protobuf 3.19.6 which is incompatible.\n", + "google-cloud-pubsub 2.23.1 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.2, but you have protobuf 3.19.6 which is incompatible.\n", + "google-cloud-resource-manager 1.12.5 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.2, but you have protobuf 3.19.6 which is incompatible.\n", + "google-cloud-translate 3.15.5 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.2, but you have protobuf 3.19.6 which is incompatible.\n", + "googleapis-common-protos 1.65.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0.dev0,>=3.20.2, but you have protobuf 3.19.6 which is incompatible.\n", + "grpc-google-iam-v1 0.13.1 requires protobuf!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.2, but you have protobuf 3.19.6 which is incompatible.\n", + "pandas-gbq 0.23.1 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n", + "tensorflow-datasets 4.9.6 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n", + "tensorflow-metadata 1.15.0 requires protobuf<4.21,>=3.20.3; python_version < \"3.11\", but you have protobuf 3.19.6 which is incompatible.\n", + "tf-keras 2.17.0 requires tensorflow<2.18,>=2.17, but you have tensorflow 2.11.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ], + "source": [ + "!pip install -q tensorflow==2.11.0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fIGek4zAUVM9" + }, + "source": [ + "- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n", + "- We'll use [Salesforce/blip-vqa-base](https://huggingface.co/Salesforce/blip-vqa-base) model from HuggingFace as an example\n", + "- In addition to `TFBlipForQuestionAnswering` we also need to save the `BlipProcessor`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "n1tqMsNXK5lN" + }, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + "from transformers import BlipProcessor, TFBlipForQuestionAnswering\n", + "import tensorflow as tf" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "PiEKBy42ezX7" + }, + "outputs": [], + "source": [ + "MODEL_NAME = \"Salesforce/blip-vqa-base\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 353, + "referenced_widgets": [ + "a8fc97ee9a5646268761e3362eb07ccd", + "0bf25fe03bcb4c9f9c0c2556d7a1ea99", + "58cac0f27ae347debd32014c34b37a1e", + "4e7a8a4a4bef4012bb7c8d3f31056ac2", + "bfbe18f452db43bea36212209eceac60", + "427370f1a81246fd85323abba58483ac", + "158c854e5e744216b485e8e0eaf33d14", + "d07cf17e58214062be88f5da1c55221b", + "2ea6b3a04c274905b5cdb76a4d1d197a", + "b03cae4fb10a47b5ac4b69cdaaa913d0", + "55e8c34dfbbb48f6b00a16762f107787", + "800ef838b66343659fffc789449c0a9f", + "22215a25c1f04cf3bc994b91716ecd91", + "a572bc9c98bb49598735bd4af9cef841", + "9c4125362fc44efea531faf2d48e6e04", + "a93f052249df447481ecf3531e52dcb2", + "ebf1f217cdef4024a9aecd90c2471986", + "98adb63f15664ac88046d941690cf13c", + "a2d6850c56e04bc08633717c569a6393", + "749cdc9d728e4ff18ec8192eb0062789", + "569e4bb367274c37bab0a314cd998e23", + "228cdee565d545f9a35b7bcbeafd29e7", + "cb4387e38cfb462ab8d53466ad9c69c8", + "26f1c75dbc8d4faab3c5874c1fbc9802", + "04e16cc0b237449299e3858c9db4295f", + "39a19e2bca9c4c1cb057cb225e90f0cf", + "9dfb9fa922954e2fac9867039e35a8bd", + "98f5799ac2314802a4d5565c05b93597", + "6331f40bb5394cb9b0ca9c5dfb104d6c", + "76f07bae7301446280b973486572e9fa", + "252ed515f22a48e2b97857e453945fb5", + "9717a812f3f84fc9ae100f9915f680df", + "22b606b09395484aaea3946d02319eca", + "2264d7fdc4a14032b4704c0caa64d8fb", + "b8c1b72a53ca4b14b7ff874942819011", + "c1048df076c946db8909c7091b82fcfa", + "6ee8baa1c4624a74835f0a434da22ce6", + "c375f592a3ab4dbbb2ff2dd98817dc1c", + "b71dcd5229a9409b83a45c561cd57489", + "9a0d0ec79a8142c3b5113bce264adeb9", + "3c2c91312ae146f8b1e95d3e81ad0056", + "ad23ef6e0c64424bb28127a9bf6b4951", + "7a99d35b201b45ceb9f18bb21bbf5cee", + "dfbd503e8f31449fa7c2358001fc77cb", + "151a916c65ee4196ae7cb53406365c45", + "33e4be1c2ce040baae33e3f100dad4f6", + "f71322f009844d02830f45b40632dc6a", + "58baacaa12b840ef9fb48bdd797ed498", + "ff0bd78c11b34f92a861029aeb3c9d3a", + "4f71c03378fc4ede80dd4c07b319df8d", + "4e345925052f464fb4aaaa92a1bd4fc7", + "e167c4bf6725441d89edcd705ba032be", + "eca99f2c5400456d92948305189d66a6", + "aebced9d65414171a2b8bc0602be1993", + "9c4c3703c5ed48c9a753797ee56b00fc" + ] + }, + "id": "NgLAnDuhexzT", + "outputId": "0612907f-81f6-4526-e16a-25822771db73" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a8fc97ee9a5646268761e3362eb07ccd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "preprocessor_config.json: 0%| | 0.00/445 [00:00> and will run it as-is.\n", + "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n", + "Cause: 'NoneType' object has no attribute '_fields'\n", + "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING: AutoGraph could not transform > and will run it as-is.\n", + "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n", + "Cause: 'NoneType' object has no attribute '_fields'\n", + "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.\n", + "WARNING:tensorflow:AutoGraph could not transform > and will run it as-is.\n", + "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n", + "Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method\n", + "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING: AutoGraph could not transform > and will run it as-is.\n", + "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n", + "Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method\n", + "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/tensorflow/python/autograph/impl/api.py:371: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. recommend setting `max_new_tokens` to control the maximum length of the generation.\n", + " return py_builtins.overload_of(f)(*args)\n", + "WARNING:absl:Found untraced functions such as serving, serving, serving, serving, patch_embedding_layer_call_fn while saving (showing 5 of 1569). These functions will not be directly callable after loading.\n" + ] + } + ], + "source": [ + "# Define TF Signature\n", + "@tf.function(\n", + " input_signature=[\n", + " {\n", + " \"pixel_values\": tf.TensorSpec((1, None, None, None), tf.float32, name=\"pixel_values\"),\n", + " \"input_ids\": tf.TensorSpec((1, None), tf.int32, name=\"input_ids\"),\n", + " \"attention_mask\": tf.TensorSpec((1, None), tf.int64, name=\"attention_mask\")\n", + " }\n", + " ]\n", + ")\n", + "def serving_fn(inputs):\n", + " # Unpack the input dictionary and pass it to the model's generate function\n", + " return model.generate(\n", + " input_ids=inputs[\"input_ids\"],\n", + " pixel_values=inputs[\"pixel_values\"],\n", + " attention_mask=inputs.get(\"attention_mask\", None)\n", + " )\n", + "\n", + "model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn.get_concrete_function()})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FYF-xt3HWEr0" + }, + "source": [ + "Let's have a look inside these two directories and see what we are dealing with:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oTlKokmrsVDR", + "outputId": "b56b637b-76a8-4471-f908-908dc44bd117" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 936\n", + "-rw-r--r-- 1 root root 471 Oct 2 18:10 preprocessor_config.json\n", + "-rw-r--r-- 1 root root 695 Oct 2 18:10 special_tokens_map.json\n", + "-rw-r--r-- 1 root root 1348 Oct 2 18:10 tokenizer_config.json\n", + "-rw-r--r-- 1 root root 711396 Oct 2 18:10 tokenizer.json\n", + "-rw-r--r-- 1 root root 231508 Oct 2 18:10 vocab.txt\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}_blip_processor" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hVzKx5bUWGny", + "outputId": "b4d9ae80-f865-4e1e-825c-a02a68ce9958" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 1503636\n", + "-rw-r--r-- 1 root root 664 Oct 2 18:18 config.json\n", + "-rw-r--r-- 1 root root 136 Oct 2 18:18 generation_config.json\n", + "drwxr-xr-x 3 root root 4096 Oct 2 18:14 saved_model\n", + "-rw-r--r-- 1 root root 1539703504 Oct 2 18:18 tf_model.h5\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JcEP4XF9WXYb", + "outputId": "2952576f-b7a6-411f-9487-605be09b654c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 61764\n", + "drwxr-xr-x 2 root root 4096 Oct 2 18:14 assets\n", + "-rw-r--r-- 1 root root 55 Oct 2 18:18 fingerprint.pb\n", + "-rw-r--r-- 1 root root 604021 Oct 2 18:18 keras_metadata.pb\n", + "-rw-r--r-- 1 root root 62626669 Oct 2 18:18 saved_model.pb\n", + "drwxr-xr-x 2 root root 4096 Oct 2 18:17 variables\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}/saved_model/1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WQ0yckQRsYCx" + }, + "source": [ + "So we need to move the files `preprocessor_config.json`, `tokenizer.json` and `vocab.txt` from processor to assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HWaeOrl6UDOI" + }, + "source": [ + "- As you can see, we need the SavedModel from `saved_model/1/` path\n", + "- We also be needing `preprocessor_config.json`, `tokenizer.json` and `vocab.txt` from processor\n", + "- All we need is to just copy those files to `saved_model/1/assets` which Spark NLP will look for" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "xiuyWqlLs4OL" + }, + "outputs": [], + "source": [ + "!mv {MODEL_NAME}_blip_processor/preprocessor_config.json {MODEL_NAME}/saved_model/1/assets\n", + "!mv {MODEL_NAME}_blip_processor/tokenizer.json {MODEL_NAME}/saved_model/1/assets\n", + "!mv {MODEL_NAME}_blip_processor/vocab.txt {MODEL_NAME}/saved_model/1/assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wa1yVpATVrZv" + }, + "source": [ + "Voila! We have our `preprocessor_config.json`, `tokenizer.json` and `vocab.txt` inside assets directory" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ljkBpPTftE8G", + "outputId": "e5922df7-f2be-409e-e395-83e2974a5750" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 928\n", + "-rw-r--r-- 1 root root 471 Oct 2 18:10 preprocessor_config.json\n", + "-rw-r--r-- 1 root root 711396 Oct 2 18:10 tokenizer.json\n", + "-rw-r--r-- 1 root root 231508 Oct 2 18:10 vocab.txt\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}/saved_model/1/assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7NdEMMiXTQbn" + }, + "source": [ + "## Import and Save BertForQuestionAnswering in Spark NLP" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YumDH6zHV1af" + }, + "source": [ + "Let's install and setup Spark NLP in Google Colab\n", + "This part is pretty easy via our simple script" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "Qb994CB80vU-" + }, + "outputs": [], + "source": [ + "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "klO_mqUs1WgE", + "outputId": "ff8b25e6-ea0c-4d59-fded-db93e3213d97" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/lib/python3.10/subprocess.py:1796: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", + " self.pid = _posixsubprocess.fork_exec(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Apache Spark version: 3.4.0\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Apache Spark version: {}\".format(spark.version))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Yj1LrqgXSp22" + }, + "source": [ + "- Let's use `loadSavedModel` functon in `BLIPForQuestionAnswering` which allows us to load TensorFlow model in SavedModel format\n", + "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n", + "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "id": "s0IKr6l21dmt" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *\n", + "\n", + "blip_for_question_answering = BLIPForQuestionAnswering.loadSavedModel(\n", + " '{}/saved_model/1'.format(MODEL_NAME),\n", + " spark\n", + " )\\\n", + " .setSize(384)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S2SXFXqqV7io" + }, + "source": [ + "Let's save it on disk so it is easier to be moved around and also be used later via .load function" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "id": "O_WLb5WTV-sI" + }, + "outputs": [], + "source": [ + "blip_for_question_answering.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8c-9B3fXWDqi" + }, + "source": [ + "Let's clean up stuff we don't need anymore" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "id": "qNTTflXjWELp" + }, + "outputs": [], + "source": [ + "!rm -rf {MODEL_NAME}_blip_processor {MODEL_NAME}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bMNZ2gdcWPJI" + }, + "source": [ + "Awesome 😎 !\n", + "\n", + "This is your BLIPForQuestionAnswering model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JPoiZrbg-agf", + "outputId": "e8be56dd-f998-499c-f8e5-b738ce81a989" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 1563412\n", + "-rw-r--r-- 1 root root 1600921187 Oct 2 18:42 blip_vqa_tensorflow\n", + "drwxr-xr-x 4 root root 4096 Oct 2 18:41 fields\n", + "drwxr-xr-x 2 root root 4096 Oct 2 18:41 metadata\n" + ] + } + ], + "source": [ + "! ls -l {MODEL_NAME}_spark_nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Oizr-BZYWVmj" + }, + "source": [ + "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny BLIPForQuestionAnswering model in Spark NLP 🚀 pipeline!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kfXocFvjWbOq" + }, + "source": [ + "Let's try with a public image of cats" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qNGGZSbxAkSp", + "outputId": "70c64f2f-3347-460e-8df2-d02fb036ff32" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2024-10-02 18:42:30-- http://images.cocodataset.org/val2017/000000039769.jpg\n", + "Resolving images.cocodataset.org (images.cocodataset.org)... 3.5.27.152, 3.5.29.161, 16.182.34.49, ...\n", + "Connecting to images.cocodataset.org (images.cocodataset.org)|3.5.27.152|:80... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 173131 (169K) [image/jpeg]\n", + "Saving to: ‘/content/cat_image.jpg’\n", + "\n", + "/content/cat_image. 100%[===================>] 169.07K 312KB/s in 0.5s \n", + "\n", + "2024-10-02 18:42:31 (312 KB/s) - ‘/content/cat_image.jpg’ saved [173131/173131]\n", + "\n" + ] + } + ], + "source": [ + "!wget -O /content/cat_image.jpg \"http://images.cocodataset.org/val2017/000000039769.jpg\"" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "id": "MDeYB-PGAvgA" + }, + "outputs": [], + "source": [ + "!mkdir images\n", + "!mv cat_image.jpg images" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l6Ii_rwDWn3J" + }, + "source": [ + "To proceed, please create a DataFrame with two columns:\n", + "\n", + "- An `image` column that contains the file path for each image in the directory.\n", + "- A `text` column where you can input the specific question you would like to ask about each image." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GlJRrn7NA5_3", + "outputId": "13703fbb-0085-49dd-9909-212bc45624f1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------------------+\n", + "| image| text|\n", + "+--------------------+--------------------+\n", + "|{file:///content/...|What's this pictu...|\n", + "+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "from pyspark.sql.functions import lit\n", + "\n", + "images_path = \"./images/\"\n", + "image_df = spark.read.format(\"image\").load(path=images_path)\n", + "\n", + "test_df = image_df.withColumn(\"text\", lit(\"What's this picture about?\"))\n", + "test_df.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XO8RXVifXNbZ" + }, + "source": [ + "Now let's build our `BLIPForQuestionAnswering` pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "00MxfP2KBKpW" + }, + "outputs": [], + "source": [ + "imageAssembler = ImageAssembler() \\\n", + " .setInputCol(\"image\") \\\n", + " .setOutputCol(\"image_assembler\") \\\n", + "\n", + "imageClassifier = BLIPForQuestionAnswering.load(\"./{}_spark_nlp\".format(MODEL_NAME)) \\\n", + " .setInputCols(\"image_assembler\") \\\n", + " .setOutputCol(\"answer\") \\\n", + " .setSize(384)\n", + "\n", + "pipeline = Pipeline(\n", + " stages=[\n", + " imageAssembler,\n", + " imageClassifier,\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "id": "m3z6twXbBhw4" + }, + "outputs": [], + "source": [ + "model = pipeline.fit(test_df)\n", + "result = model.transform(test_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_8NQhgilCGDO", + "outputId": "ed295952-9553-4780-f3fd-9a6adea89fe7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------------------------+------+\n", + "|origin |result|\n", + "+--------------------------------------+------+\n", + "|[file:///content/images/cat_image.jpg]|[cats]|\n", + "+--------------------------------------+------+\n", + "\n" + ] + } + ], + "source": [ + "result.select(\"image_assembler.origin\", \"answer.result\").show(truncate = False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YDvCiVP3XXPd" + }, + "source": [ + "That's it! You can now go wild and use hundreds of `BLIPForQuestionAnswering` models from HuggingFace 🤗 in Spark NLP 🚀\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "A100", + "machine_shape": "hm", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "04e16cc0b237449299e3858c9db4295f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_76f07bae7301446280b973486572e9fa", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_252ed515f22a48e2b97857e453945fb5", + "value": 231508 + } + }, + "0b1ed81f489c4fd09ab7bb1d1ad938fb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0bf25fe03bcb4c9f9c0c2556d7a1ea99": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_427370f1a81246fd85323abba58483ac", + "placeholder": "​", + "style": "IPY_MODEL_158c854e5e744216b485e8e0eaf33d14", + "value": "preprocessor_config.json: 100%" + } + }, + "0e3e739b6a5c4e4aaec788974ef551b5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7c4edf1f672042e68e6a15e7da5a0127", + "placeholder": "​", + "style": "IPY_MODEL_21951a3e1c6a4650851d4ee31cd2387f", + "value": " 1.54G/1.54G [00:51<00:00, 29.4MB/s]" + } + }, + "111f56022b3c4737a9f643143673c6b5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "151a916c65ee4196ae7cb53406365c45": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_33e4be1c2ce040baae33e3f100dad4f6", + "IPY_MODEL_f71322f009844d02830f45b40632dc6a", + "IPY_MODEL_58baacaa12b840ef9fb48bdd797ed498" + ], + "layout": "IPY_MODEL_ff0bd78c11b34f92a861029aeb3c9d3a" + } + }, + "158c854e5e744216b485e8e0eaf33d14": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "16ddbd3fcb7f4dba8e8b48d6f6962046": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_111f56022b3c4737a9f643143673c6b5", + "placeholder": "​", + "style": "IPY_MODEL_af46ebc1d3d84a8589920ee7338936cf", + "value": "config.json: 100%" + } + }, + "18317efb0631479bbbd6f373942c7349": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "21951a3e1c6a4650851d4ee31cd2387f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "22215a25c1f04cf3bc994b91716ecd91": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ebf1f217cdef4024a9aecd90c2471986", + "placeholder": "​", + "style": "IPY_MODEL_98adb63f15664ac88046d941690cf13c", + "value": "tokenizer_config.json: 100%" + } + }, + "2264d7fdc4a14032b4704c0caa64d8fb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b8c1b72a53ca4b14b7ff874942819011", + "IPY_MODEL_c1048df076c946db8909c7091b82fcfa", + "IPY_MODEL_6ee8baa1c4624a74835f0a434da22ce6" + ], + "layout": "IPY_MODEL_c375f592a3ab4dbbb2ff2dd98817dc1c" + } + }, + "228cdee565d545f9a35b7bcbeafd29e7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "22b606b09395484aaea3946d02319eca": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "252ed515f22a48e2b97857e453945fb5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "26f1c75dbc8d4faab3c5874c1fbc9802": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_98f5799ac2314802a4d5565c05b93597", + "placeholder": "​", + "style": "IPY_MODEL_6331f40bb5394cb9b0ca9c5dfb104d6c", + "value": "vocab.txt: 100%" + } + }, + "2ea6b3a04c274905b5cdb76a4d1d197a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "33e4be1c2ce040baae33e3f100dad4f6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4f71c03378fc4ede80dd4c07b319df8d", + "placeholder": "​", + "style": "IPY_MODEL_4e345925052f464fb4aaaa92a1bd4fc7", + "value": "special_tokens_map.json: 100%" + } + }, + "39202d00e08f49d196159bdd16c29f6f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6fe7e0e408d54752ae71d47a58f31469", + "placeholder": "​", + "style": "IPY_MODEL_ddddfea881df4a7b89845fb4485edf0d", + "value": "model.safetensors: 100%" + } + }, + "39a19e2bca9c4c1cb057cb225e90f0cf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9717a812f3f84fc9ae100f9915f680df", + "placeholder": "​", + "style": "IPY_MODEL_22b606b09395484aaea3946d02319eca", + "value": " 232k/232k [00:00<00:00, 668kB/s]" + } + }, + "3c2c91312ae146f8b1e95d3e81ad0056": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "427370f1a81246fd85323abba58483ac": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4e345925052f464fb4aaaa92a1bd4fc7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4e7a8a4a4bef4012bb7c8d3f31056ac2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b03cae4fb10a47b5ac4b69cdaaa913d0", + "placeholder": "​", + "style": "IPY_MODEL_55e8c34dfbbb48f6b00a16762f107787", + "value": " 445/445 [00:00<00:00, 32.3kB/s]" + } + }, + "4f5e6c1c45794f03aed2dd7223dd3255": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_16ddbd3fcb7f4dba8e8b48d6f6962046", + "IPY_MODEL_d11879914a854d8a91a4872ef4afc942", + "IPY_MODEL_ec039adb3b1f4522a7dac4386040590a" + ], + "layout": "IPY_MODEL_f7de63cc1da94daf9dc83406301873a3" + } + }, + "4f71c03378fc4ede80dd4c07b319df8d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "559b67a1bb9240a887a34c9eafda45eb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "55e8c34dfbbb48f6b00a16762f107787": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "569e4bb367274c37bab0a314cd998e23": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "58baacaa12b840ef9fb48bdd797ed498": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_aebced9d65414171a2b8bc0602be1993", + "placeholder": "​", + "style": "IPY_MODEL_9c4c3703c5ed48c9a753797ee56b00fc", + "value": " 125/125 [00:00<00:00, 11.2kB/s]" + } + }, + "58cac0f27ae347debd32014c34b37a1e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d07cf17e58214062be88f5da1c55221b", + "max": 445, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2ea6b3a04c274905b5cdb76a4d1d197a", + "value": 445 + } + }, + "5ce925ad60054d518453a6c6ae8d1707": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "626dcbd9418949b0b7e5dc8680f9b19b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_704723b61c674d3d9c322f6b31c9830a", + "max": 1538800584, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_559b67a1bb9240a887a34c9eafda45eb", + "value": 1538800584 + } + }, + "6331f40bb5394cb9b0ca9c5dfb104d6c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6ee8baa1c4624a74835f0a434da22ce6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7a99d35b201b45ceb9f18bb21bbf5cee", + "placeholder": "​", + "style": "IPY_MODEL_dfbd503e8f31449fa7c2358001fc77cb", + "value": " 711k/711k [00:00<00:00, 1.37MB/s]" + } + }, + "6fe7e0e408d54752ae71d47a58f31469": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "704723b61c674d3d9c322f6b31c9830a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "749cdc9d728e4ff18ec8192eb0062789": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "763498ed74e6446a972930ab96d5d4d8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "76f07bae7301446280b973486572e9fa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7a99d35b201b45ceb9f18bb21bbf5cee": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7c4edf1f672042e68e6a15e7da5a0127": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "800ef838b66343659fffc789449c0a9f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_22215a25c1f04cf3bc994b91716ecd91", + "IPY_MODEL_a572bc9c98bb49598735bd4af9cef841", + "IPY_MODEL_9c4125362fc44efea531faf2d48e6e04" + ], + "layout": "IPY_MODEL_a93f052249df447481ecf3531e52dcb2" + } + }, + "9717a812f3f84fc9ae100f9915f680df": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "98adb63f15664ac88046d941690cf13c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "98f5799ac2314802a4d5565c05b93597": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9a0d0ec79a8142c3b5113bce264adeb9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9c4125362fc44efea531faf2d48e6e04": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_569e4bb367274c37bab0a314cd998e23", + "placeholder": "​", + "style": "IPY_MODEL_228cdee565d545f9a35b7bcbeafd29e7", + "value": " 592/592 [00:00<00:00, 53.5kB/s]" + } + }, + "9c4c3703c5ed48c9a753797ee56b00fc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9dfb9fa922954e2fac9867039e35a8bd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a2d6850c56e04bc08633717c569a6393": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a572bc9c98bb49598735bd4af9cef841": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a2d6850c56e04bc08633717c569a6393", + "max": 592, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_749cdc9d728e4ff18ec8192eb0062789", + "value": 592 + } + }, + "a8fc97ee9a5646268761e3362eb07ccd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0bf25fe03bcb4c9f9c0c2556d7a1ea99", + "IPY_MODEL_58cac0f27ae347debd32014c34b37a1e", + "IPY_MODEL_4e7a8a4a4bef4012bb7c8d3f31056ac2" + ], + "layout": "IPY_MODEL_bfbe18f452db43bea36212209eceac60" + } + }, + "a9265e8b56b14330a51ac0e07faab189": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_39202d00e08f49d196159bdd16c29f6f", + "IPY_MODEL_626dcbd9418949b0b7e5dc8680f9b19b", + "IPY_MODEL_0e3e739b6a5c4e4aaec788974ef551b5" + ], + "layout": "IPY_MODEL_5ce925ad60054d518453a6c6ae8d1707" + } + }, + "a93f052249df447481ecf3531e52dcb2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ad23ef6e0c64424bb28127a9bf6b4951": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "aebced9d65414171a2b8bc0602be1993": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "af46ebc1d3d84a8589920ee7338936cf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b03cae4fb10a47b5ac4b69cdaaa913d0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b71dcd5229a9409b83a45c561cd57489": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b8c1b72a53ca4b14b7ff874942819011": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b71dcd5229a9409b83a45c561cd57489", + "placeholder": "​", + "style": "IPY_MODEL_9a0d0ec79a8142c3b5113bce264adeb9", + "value": "tokenizer.json: 100%" + } + }, + "bfbe18f452db43bea36212209eceac60": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c1048df076c946db8909c7091b82fcfa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3c2c91312ae146f8b1e95d3e81ad0056", + "max": 711396, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ad23ef6e0c64424bb28127a9bf6b4951", + "value": 711396 + } + }, + "c375f592a3ab4dbbb2ff2dd98817dc1c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "caa25abd3df346da806da3659070ae87": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cb4387e38cfb462ab8d53466ad9c69c8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_26f1c75dbc8d4faab3c5874c1fbc9802", + "IPY_MODEL_04e16cc0b237449299e3858c9db4295f", + "IPY_MODEL_39a19e2bca9c4c1cb057cb225e90f0cf" + ], + "layout": "IPY_MODEL_9dfb9fa922954e2fac9867039e35a8bd" + } + }, + "d07cf17e58214062be88f5da1c55221b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d11879914a854d8a91a4872ef4afc942": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_caa25abd3df346da806da3659070ae87", + "max": 4559, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0b1ed81f489c4fd09ab7bb1d1ad938fb", + "value": 4559 + } + }, + "ddddfea881df4a7b89845fb4485edf0d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "dfbd503e8f31449fa7c2358001fc77cb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e167c4bf6725441d89edcd705ba032be": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ebf1f217cdef4024a9aecd90c2471986": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ec039adb3b1f4522a7dac4386040590a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_763498ed74e6446a972930ab96d5d4d8", + "placeholder": "​", + "style": "IPY_MODEL_18317efb0631479bbbd6f373942c7349", + "value": " 4.56k/4.56k [00:00<00:00, 378kB/s]" + } + }, + "eca99f2c5400456d92948305189d66a6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f71322f009844d02830f45b40632dc6a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e167c4bf6725441d89edcd705ba032be", + "max": 125, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_eca99f2c5400456d92948305189d66a6", + "value": 125 + } + }, + "f7de63cc1da94daf9dc83406301873a3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ff0bd78c11b34f92a861029aeb3c9d3a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} From c256e1648af4addf0e5bb663e8f4c93ede986258 Mon Sep 17 00:00:00 2001 From: Danilo Burbano Date: Thu, 3 Oct 2024 12:57:23 -0500 Subject: [PATCH 3/6] [SPARKNLP-1068] Fix fullAnnotateImage validation --- src/main/scala/com/johnsnowlabs/nlp/LightPipeline.scala | 4 +--- src/test/scala/com/johnsnowlabs/nlp/ImageAssemblerTest.scala | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/LightPipeline.scala b/src/main/scala/com/johnsnowlabs/nlp/LightPipeline.scala index d6793fdba19e8e..20236a5732f3fd 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/LightPipeline.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/LightPipeline.scala @@ -78,8 +78,7 @@ class LightPipeline(val pipelineModel: PipelineModel, parseEmbeddings: Boolean = } def fullAnnotateImage(pathToImage: String, text: String = ""): Map[String, Seq[IAnnotation]] = { - val isValidFile = ResourceHelper.validFile(pathToImage) - if (!isValidFile || isValidFile && text.isEmpty) { + if (!ResourceHelper.validFile(pathToImage)) { Map() } else fullAnnotateInternal(pathToImage, text) } @@ -374,7 +373,6 @@ class LightPipeline(val pipelineModel: PipelineModel, parseEmbeddings: Boolean = pathToImages: java.util.ArrayList[String], texts: java.util.ArrayList[String]) : java.util.List[java.util.Map[String, java.util.List[IAnnotation]]] = { - if (texts.isEmpty) { pathToImages.asScala.par .map { imageFilePath => diff --git a/src/test/scala/com/johnsnowlabs/nlp/ImageAssemblerTest.scala b/src/test/scala/com/johnsnowlabs/nlp/ImageAssemblerTest.scala index d48686bafe9c4f..0161fbdff4e35c 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/ImageAssemblerTest.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/ImageAssemblerTest.scala @@ -58,7 +58,7 @@ class ImageAssemblerTest extends AnyFlatSpec { }) } - it should "work with text column" in { + it should "work with text column" taggedAs FastTest in { val testDF: DataFrame = dataFrame.withColumn("text", lit("What's this picture about?")) val imageAssembler: ImageAssembler = new ImageAssembler() From 7c46662a6e7e78d53b41ed2d27c4a02a38243677 Mon Sep 17 00:00:00 2001 From: Danilo Burbano Date: Thu, 3 Oct 2024 13:48:27 -0500 Subject: [PATCH 4/6] [SPARKNLP-1068] Solves BLIPForQuestionAnsweringTest issue --- .../cv/BLIPForQuestionAnsweringTest.scala | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnsweringTest.scala b/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnsweringTest.scala index 3b068b6e47a5c9..d511151316ce96 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnsweringTest.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnsweringTest.scala @@ -27,19 +27,7 @@ import org.scalatest.flatspec.AnyFlatSpec class BLIPForQuestionAnsweringTest extends AnyFlatSpec { - private val modelsPath = "/models/transformers" - val tfModelPath = s"$modelsPath/tf/blip-vqa-tf/Salesforce/blip-vqa-base/saved_model/1" - val sparkNLPModelPath = s"$modelsPath/spark-nlp/tf/blip-vqa" - - val model = getBLIPForQuestionAnsweringPipelineModel - - "BLIP" should "load and save model" ignore { - val blipForQuestionAnswering = BLIPForQuestionAnswering - .loadSavedModel(tfModelPath, ResourceHelper.spark) - .setSize(384) - - blipForQuestionAnswering.write.overwrite().save(sparkNLPModelPath) - } + lazy val model = getBLIPForQuestionAnsweringPipelineModel "BLIP" should "answer a question for a given image" taggedAs SlowTest in { From 1b4b29d1e0c53dfd934c9d6ecf6df6082932e163 Mon Sep 17 00:00:00 2001 From: Danilo Burbano Date: Fri, 4 Oct 2024 07:49:32 -0500 Subject: [PATCH 5/6] [SPARKNLP-1068] Updates default BLIPForQuestionAnswering model name --- python/sparknlp/annotator/cv/blip_for_question_answering.py | 2 +- .../nlp/annotators/cv/BLIPForQuestionAnswering.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/sparknlp/annotator/cv/blip_for_question_answering.py b/python/sparknlp/annotator/cv/blip_for_question_answering.py index b861449e27d862..6153ddd61fe302 100644 --- a/python/sparknlp/annotator/cv/blip_for_question_answering.py +++ b/python/sparknlp/annotator/cv/blip_for_question_answering.py @@ -84,7 +84,7 @@ def loadSavedModel(folder, spark_session): return BLIPForQuestionAnswering(java_model=jModel) @staticmethod - def pretrained(name="blip_vqa_tf", lang="en", remote_loc=None): + def pretrained(name="blip_vqa_base", lang="en", remote_loc=None): """Downloads and loads a pretrained model. Parameters diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnswering.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnswering.scala index 9cd5bca6ff9e35..88c21943b06928 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnswering.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnswering.scala @@ -219,7 +219,7 @@ trait ReadablePretrainedBLIPForQuestionAnswering extends ParamsAndFeaturesReadable[BLIPForQuestionAnswering] with HasPretrained[BLIPForQuestionAnswering] { - override val defaultModelName: Some[String] = Some("blip_vqa_tf") + override val defaultModelName: Some[String] = Some("blip_vqa_base") /** Java compliant-overrides */ override def pretrained(): BLIPForQuestionAnswering = super.pretrained() From e121763255b2296234858699a95d9d417fbb4c89 Mon Sep 17 00:00:00 2001 From: Danilo Burbano Date: Wed, 9 Oct 2024 12:44:23 -0500 Subject: [PATCH 6/6] [SPARKNLP-1068] [skip test] Adding documentation to BLIPForQuestionAnswering --- .../cv/blip_for_question_answering.py | 67 +++++++++++++- .../cv/BLIPForQuestionAnswering.scala | 87 ++++++++++++++++++- 2 files changed, 151 insertions(+), 3 deletions(-) diff --git a/python/sparknlp/annotator/cv/blip_for_question_answering.py b/python/sparknlp/annotator/cv/blip_for_question_answering.py index 6153ddd61fe302..fe018c0e683bf2 100644 --- a/python/sparknlp/annotator/cv/blip_for_question_answering.py +++ b/python/sparknlp/annotator/cv/blip_for_question_answering.py @@ -20,6 +20,71 @@ class BLIPForQuestionAnswering(AnnotatorModel, HasEngine, HasCandidateLabelsProperties, HasRescaleFactor): + """BLIPForQuestionAnswering can load BLIP models for visual question answering. + The model consists of a vision encoder, a text encoder as well as a text decoder. + The vision encoder will encode the input image, the text encoder will encode the input question together + with the encoding of the image, and the text decoder will output the answer to the question. + + Pretrained models can be loaded with :meth:`.pretrained` of the companion + object: + + >>> visualQAClassifier = BLIPForQuestionAnswering.pretrained() \\ + ... .setInputCols(["image_assembler"]) \\ + ... .setOutputCol("answer") + + The default model is ``"blip_vqa_base"``, if no name is + provided. + + For available pretrained models please see the `Models Hub + `__. + + To see which models are compatible and how to import them see + `Import Transformers into Spark NLP 🚀 + `_. + + ====================== ====================== + Input Annotation types Output Annotation type + ====================== ====================== + ``IMAGE`` ``DOCUMENT`` + ====================== ====================== + + Parameters + ---------- + batchSize + Batch size. Large values allows faster processing but requires more + memory, by default 2 + configProtoBytes + ConfigProto from tensorflow, serialized into byte array. + maxSentenceLength + Max sentence length to process, by default 50 + + Examples + -------- + >>> import sparknlp + >>> from sparknlp.base import * + >>> from sparknlp.annotator import * + >>> from pyspark.ml import Pipeline + >>> image_df = SparkSessionForTest.spark.read.format("image").load(path=images_path) + >>> test_df = image_df.withColumn("text", lit("What's this picture about?")) + >>> imageAssembler = ImageAssembler() \\ + ... .setInputCol("image") \\ + ... .setOutputCol("image_assembler") + >>> visualQAClassifier = BLIPForQuestionAnswering.pretrained() \\ + ... .setInputCols("image_assembler") \\ + ... .setOutputCol("answer") \\ + ... .setSize(384) + >>> pipeline = Pipeline().setStages([ + ... imageAssembler, + ... visualQAClassifier + ... ]) + >>> result = pipeline.fit(test_df).transform(test_df) + >>> result.select("image_assembler.origin", "answer.result").show(false) + +--------------------------------------+------+ + |origin |result| + +--------------------------------------+------+ + |[file:///content/images/cat_image.jpg]|[cats]| + +--------------------------------------+------+ + """ name = "BLIPForQuestionAnswering" @@ -59,7 +124,7 @@ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.cv.BLIPForQuestion ) self._setDefault( batchSize=2, - size=224, + size=384, maxSentenceLength=50 ) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnswering.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnswering.scala index 88c21943b06928..a0f15de929cafb 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnswering.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnswering.scala @@ -31,17 +31,100 @@ import com.johnsnowlabs.ml.util.LoadExternalModel.{ import com.johnsnowlabs.ml.util.TensorFlow import com.johnsnowlabs.nlp.AnnotatorType.{DOCUMENT, IMAGE} import com.johnsnowlabs.nlp._ -import com.johnsnowlabs.nlp.annotators.{RegexTokenizer, Tokenizer, TokenizerModel} +import com.johnsnowlabs.nlp.annotators.RegexTokenizer import com.johnsnowlabs.nlp.annotators.cv.feature_extractor.Preprocessor import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector import com.johnsnowlabs.nlp.annotators.tokenizer.bpe.{BertTokenizer, SpecialTokens} -import com.johnsnowlabs.nlp.annotators.tokenizer.wordpiece.BasicTokenizer import com.johnsnowlabs.nlp.serialization.MapFeature import org.apache.spark.broadcast.Broadcast import org.apache.spark.ml.param.{IntArrayParam, IntParam} import org.apache.spark.ml.util.Identifiable import org.apache.spark.sql.SparkSession +/** BLIPForQuestionAnswering can load BLIP models for visual question answering. The model + * consists of a vision encoder, a text encoder as well as a text decoder. The vision encoder + * will encode the input image, the text encoder will encode the input question together with the + * encoding of the image, and the text decoder will output the answer to the question. + * + * Pretrained models can be loaded with `pretrained` of the companion object: + * {{{ + * val visualQAClassifier = BLIPForQuestionAnswering.pretrained() + * .setInputCols("image_assembler") + * .setOutputCol("answer") + * }}} + * The default model is `"blip_vqa_base"`, if no name is provided. + * + * For available pretrained models please see the + * [[https://sparknlp.org/models?task=Question+Answering Models Hub]]. + * + * Models from the HuggingFace 🤗 Transformers library are also compatible with Spark NLP 🚀. To + * see which models are compatible and how to import them see + * [[https://github.com/JohnSnowLabs/spark-nlp/discussions/5669]] and to see more extended + * examples, see + * [[https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnsweringTest.scala]]. + * + * ==Example== + * {{{ + * import spark.implicits._ + * import com.johnsnowlabs.nlp.base._ + * import com.johnsnowlabs.nlp.annotator._ + * import org.apache.spark.ml.Pipeline + * + * val imageDF: DataFrame = ResourceHelper.spark.read + * .format("image") + * .option("dropInvalid", value = true) + * .load(imageFolder) + * + * val testDF: DataFrame = imageDF.withColumn("text", lit("What's this picture about?")) + * + * val imageAssembler: ImageAssembler = new ImageAssembler() + * .setInputCol("image") + * .setOutputCol("image_assembler") + * + * val visualQAClassifier = BLIPForQuestionAnswering.pretrained() + * .setInputCols("image_assembler") + * .setOutputCol("answer") + * + * val pipeline = new Pipeline().setStages(Array( + * imageAssembler, + * visualQAClassifier + * )) + * + * val result = pipeline.fit(testDF).transform(testDF) + * + * result.select("image_assembler.origin", "answer.result").show(false) + * +--------------------------------------+------+ + * |origin |result| + * +--------------------------------------+------+ + * |[file:///content/images/cat_image.jpg]|[cats]| + * +--------------------------------------+------+ + * }}} + * + * @see + * [[CLIPForZeroShotClassification]] for Zero Shot Image Classifier + * @see + * [[https://sparknlp.org/docs/en/annotators Annotators Main Page]] for a list of transformer + * based classifiers + * @param uid + * required uid for storing annotator to disk + * @groupname anno Annotator types + * @groupdesc anno + * Required input and expected output annotator types + * @groupname Ungrouped Members + * @groupname param Parameters + * @groupname setParam Parameter setters + * @groupname getParam Parameter getters + * @groupname Ungrouped Members + * @groupprio param 1 + * @groupprio anno 2 + * @groupprio Ungrouped 3 + * @groupprio setParam 4 + * @groupprio getParam 5 + * @groupdesc param + * A list of (hyper-)parameter keys this annotator can take. Users can set and get the + * parameter values through setters and getters, respectively. + */ + class BLIPForQuestionAnswering(override val uid: String) extends AnnotatorModel[BLIPForQuestionAnswering] with HasBatchedAnnotateImage[BLIPForQuestionAnswering]