[SPARKNLP-1068] [skip test] Adding documentation to BLIPForQuestionAn…

…swering
JohnSnowLabs · Oct 9, 2024 · e121763 · e121763
1 parent 1b4b29d
commit e121763
Show file tree

Hide file tree

Showing 2 changed files with 151 additions and 3 deletions.
diff --git a/python/sparknlp/annotator/cv/blip_for_question_answering.py b/python/sparknlp/annotator/cv/blip_for_question_answering.py
@@ -20,6 +20,71 @@ class BLIPForQuestionAnswering(AnnotatorModel,
                                HasEngine,
                                HasCandidateLabelsProperties,
                                HasRescaleFactor):
+    """BLIPForQuestionAnswering can load BLIP models  for visual question answering.
+    The model consists of a vision encoder, a text encoder as well as a text decoder.
+    The vision encoder will encode the input image, the text encoder will encode the input question together
+    with the encoding of the image, and the text decoder will output the answer to the question.
+
+    Pretrained models can be loaded with :meth:`.pretrained` of the companion
+    object:
+
+    >>> visualQAClassifier = BLIPForQuestionAnswering.pretrained() \\
+    ...     .setInputCols(["image_assembler"]) \\
+    ...     .setOutputCol("answer")
+
+    The default model is ``"blip_vqa_base"``, if no name is
+    provided.
+
+    For available pretrained models please see the `Models Hub
+    <https://sparknlp.org/models?task=Question+Answering>`__.
+
+    To see which models are compatible and how to import them see
+    `Import Transformers into Spark NLP 🚀
+    <https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
+
+    ====================== ======================
+    Input Annotation types Output Annotation type
+    ====================== ======================
+    ``IMAGE``              ``DOCUMENT``
+    ====================== ======================
+
+    Parameters
+    ----------
+    batchSize
+        Batch size. Large values allows faster processing but requires more
+        memory, by default 2
+    configProtoBytes
+        ConfigProto from tensorflow, serialized into byte array.
+    maxSentenceLength
+        Max sentence length to process, by default 50
+
+    Examples
+    --------
+    >>> import sparknlp
+    >>> from sparknlp.base import *
+    >>> from sparknlp.annotator import *
+    >>> from pyspark.ml import Pipeline
+    >>> image_df = SparkSessionForTest.spark.read.format("image").load(path=images_path)
+    >>> test_df = image_df.withColumn("text", lit("What's this picture about?"))
+    >>> imageAssembler = ImageAssembler() \\
+    ...     .setInputCol("image") \\
+    ...     .setOutputCol("image_assembler")
+    >>> visualQAClassifier = BLIPForQuestionAnswering.pretrained() \\
+    ...     .setInputCols("image_assembler") \\
+    ...     .setOutputCol("answer") \\
+    ...     .setSize(384)
+    >>> pipeline = Pipeline().setStages([
+    ...     imageAssembler,
+    ...     visualQAClassifier
+    ... ])
+    >>> result = pipeline.fit(test_df).transform(test_df)
+    >>> result.select("image_assembler.origin", "answer.result").show(false)
+    +--------------------------------------+------+
+    |origin                                |result|
+    +--------------------------------------+------+
+    |[file:///content/images/cat_image.jpg]|[cats]|
+    +--------------------------------------+------+
+    """
 
     name = "BLIPForQuestionAnswering"
 
@@ -59,7 +124,7 @@ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.cv.BLIPForQuestion
         )
         self._setDefault(
             batchSize=2,
-            size=224,
+            size=384,
             maxSentenceLength=50
         )
 

diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnswering.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnswering.scala
@@ -31,17 +31,100 @@ import com.johnsnowlabs.ml.util.LoadExternalModel.{
 import com.johnsnowlabs.ml.util.TensorFlow
 import com.johnsnowlabs.nlp.AnnotatorType.{DOCUMENT, IMAGE}
 import com.johnsnowlabs.nlp._
-import com.johnsnowlabs.nlp.annotators.{RegexTokenizer, Tokenizer, TokenizerModel}
+import com.johnsnowlabs.nlp.annotators.RegexTokenizer
 import com.johnsnowlabs.nlp.annotators.cv.feature_extractor.Preprocessor
 import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector
 import com.johnsnowlabs.nlp.annotators.tokenizer.bpe.{BertTokenizer, SpecialTokens}
-import com.johnsnowlabs.nlp.annotators.tokenizer.wordpiece.BasicTokenizer
 import com.johnsnowlabs.nlp.serialization.MapFeature
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.ml.param.{IntArrayParam, IntParam}
 import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.sql.SparkSession
 
+/** BLIPForQuestionAnswering can load BLIP models for visual question answering. The model
+  * consists of a vision encoder, a text encoder as well as a text decoder. The vision encoder
+  * will encode the input image, the text encoder will encode the input question together with the
+  * encoding of the image, and the text decoder will output the answer to the question.
+  *
+  * Pretrained models can be loaded with `pretrained` of the companion object:
+  * {{{
+  * val visualQAClassifier = BLIPForQuestionAnswering.pretrained()
+  *   .setInputCols("image_assembler")
+  *   .setOutputCol("answer")
+  * }}}
+  * The default model is `"blip_vqa_base"`, if no name is provided.
+  *
+  * For available pretrained models please see the
+  * [[https://sparknlp.org/models?task=Question+Answering Models Hub]].
+  *
+  * Models from the HuggingFace 🤗 Transformers library are also compatible with Spark NLP 🚀. To
+  * see which models are compatible and how to import them see
+  * [[https://github.com/JohnSnowLabs/spark-nlp/discussions/5669]] and to see more extended
+  * examples, see
+  * [[https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/BLIPForQuestionAnsweringTest.scala]].
+  *
+  * ==Example==
+  * {{{
+  * import spark.implicits._
+  * import com.johnsnowlabs.nlp.base._
+  * import com.johnsnowlabs.nlp.annotator._
+  * import org.apache.spark.ml.Pipeline
+  *
+  * val imageDF: DataFrame = ResourceHelper.spark.read
+  *  .format("image")
+  *  .option("dropInvalid", value = true)
+  *  .load(imageFolder)
+  *
+  * val testDF: DataFrame = imageDF.withColumn("text", lit("What's this picture about?"))
+  *
+  * val imageAssembler: ImageAssembler = new ImageAssembler()
+  *   .setInputCol("image")
+  *   .setOutputCol("image_assembler")
+  *
+  * val visualQAClassifier = BLIPForQuestionAnswering.pretrained()
+  *   .setInputCols("image_assembler")
+  *   .setOutputCol("answer")
+  *
+  * val pipeline = new Pipeline().setStages(Array(
+  *   imageAssembler,
+  *   visualQAClassifier
+  * ))
+  *
+  * val result = pipeline.fit(testDF).transform(testDF)
+  *
+  * result.select("image_assembler.origin", "answer.result").show(false)
+  * +--------------------------------------+------+
+  * |origin                                |result|
+  * +--------------------------------------+------+
+  * |[file:///content/images/cat_image.jpg]|[cats]|
+  * +--------------------------------------+------+
+  * }}}
+  *
+  * @see
+  *   [[CLIPForZeroShotClassification]] for Zero Shot Image Classifier
+  * @see
+  *   [[https://sparknlp.org/docs/en/annotators Annotators Main Page]] for a list of transformer
+  *   based classifiers
+  * @param uid
+  *   required uid for storing annotator to disk
+  * @groupname anno Annotator types
+  * @groupdesc anno
+  *   Required input and expected output annotator types
+  * @groupname Ungrouped Members
+  * @groupname param Parameters
+  * @groupname setParam Parameter setters
+  * @groupname getParam Parameter getters
+  * @groupname Ungrouped Members
+  * @groupprio param  1
+  * @groupprio anno  2
+  * @groupprio Ungrouped 3
+  * @groupprio setParam  4
+  * @groupprio getParam  5
+  * @groupdesc param
+  *   A list of (hyper-)parameter keys this annotator can take. Users can set and get the
+  *   parameter values through setters and getters, respectively.
+  */
+
 class BLIPForQuestionAnswering(override val uid: String)
     extends AnnotatorModel[BLIPForQuestionAnswering]
     with HasBatchedAnnotateImage[BLIPForQuestionAnswering]