From 5b6112394586ad74e8d8b49af8ccb2aa000a1af4 Mon Sep 17 00:00:00 2001 From: Devin Ha <33089471+DevinTDHa@users.noreply.github.com> Date: Thu, 8 Jun 2023 10:53:23 +0200 Subject: [PATCH 01/11] Resolve saving bug with multilabel parameter (#13842) --- .../johnsnowlabs/nlp/HasClassifierActivationProperties.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/HasClassifierActivationProperties.scala b/src/main/scala/com/johnsnowlabs/nlp/HasClassifierActivationProperties.scala index 1f1dc4bdf48ce3..9d92e7a02482e6 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/HasClassifierActivationProperties.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/HasClassifierActivationProperties.scala @@ -16,7 +16,7 @@ package com.johnsnowlabs.nlp -import org.apache.spark.ml.param.{FloatParam, Param} +import org.apache.spark.ml.param.{BooleanParam, FloatParam, Param} trait HasClassifierActivationProperties extends ParamsAndFeaturesWritable { @@ -47,7 +47,7 @@ trait HasClassifierActivationProperties extends ParamsAndFeaturesWritable { * * @group param */ - val multilabel: Param[Boolean] = new Param( + val multilabel: BooleanParam = new BooleanParam( this, "multilabel", "Whether or not the result should be multi-class (the sum of all probabilities is 1.0) or multi-label (each label has a probability between 0.0 to 1.0). Default is False i.e. multi-class") From 09922099fa42a1b74337032ac700bc4663d836a5 Mon Sep 17 00:00:00 2001 From: Devin Ha <33089471+DevinTDHa@users.noreply.github.com> Date: Thu, 8 Jun 2023 11:03:22 +0200 Subject: [PATCH 02/11] SPARKNLP-815: Add examples for ZeroShotClassifiers (#13845) --- ...rk_NLP_BertForZeroShotClassification.ipynb | 630 ++++ ..._NLP_DistilBertForZeroClassification.ipynb | 2479 ++++++++++++++ ...NLP_RoBertaForZeroShotClassification.ipynb | 2839 +++++++++++++++++ 3 files changed, 5948 insertions(+) create mode 100644 examples/python/transformers/HuggingFace_in_Spark_NLP_BertForZeroShotClassification.ipynb create mode 100644 examples/python/transformers/HuggingFace_in_Spark_NLP_DistilBertForZeroClassification.ipynb create mode 100644 examples/python/transformers/HuggingFace_in_Spark_NLP_RoBertaForZeroShotClassification.ipynb diff --git a/examples/python/transformers/HuggingFace_in_Spark_NLP_BertForZeroShotClassification.ipynb b/examples/python/transformers/HuggingFace_in_Spark_NLP_BertForZeroShotClassification.ipynb new file mode 100644 index 00000000000000..796edc7d0ac0e1 --- /dev/null +++ b/examples/python/transformers/HuggingFace_in_Spark_NLP_BertForZeroShotClassification.ipynb @@ -0,0 +1,630 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "8IXf_Q668WRo" + }, + "source": [ + "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20BertForZeroShotClassification.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fDfihUkE8WRr" + }, + "source": [ + "## Import BertForZeroShotClassification models from HuggingFace 🤗 into Spark NLP 🚀 \n", + "\n", + "Let's keep in mind a few things before we start 😊 \n", + "\n", + "- This feature is only in `Spark NLP 4.4.0` and after. So please make sure you have upgraded to the latest Spark NLP release\n", + "- You can import Bert models trained/fine-tuned for sequence classification via `BertForSequenceClassification` or `TFBertForSequenceClassification`. We can use these models for zero-shot classification.\n", + " - These models are usually under `Sequence Classification` category and have `bert` in their labels\n", + " - For zero-shot classification, we will use models trained on the nli data sets. The model should have been trained on the labels `contradiction`, `entailment` and `neutral`.\n", + "- Reference: [TFBertForSequenceClassification](https://huggingface.co/docs/transformers/main/en/model_doc/bert#transformers.TFBertForSequenceClassification)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vMg3NbLo8WRs" + }, + "source": [ + "## Export and Save HuggingFace model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ykej1XKH8WRu" + }, + "source": [ + "- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n", + "- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yn28bSQi8WRu", + "outputId": "b49cc806-96c5-4013-d17b-cade1e93960a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.8/5.8 MB\u001b[0m \u001b[31m63.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m21.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m76.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m65.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m82.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m37.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m107.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "tensorflow-datasets 4.9.2 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n", + "tensorflow-metadata 1.13.1 requires protobuf<5,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ], + "source": [ + "!pip install -q transformers==4.25.1 tensorflow==2.11.0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ehfCmKt98WRw" + }, + "source": [ + "- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n", + "- We'll use [bert-base-mnli](https://huggingface.co/aloxatel/bert-base-mnli) model from HuggingFace as an example\n", + "- In addition to `TFBertForSequenceClassification` we also need to save the `BertTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP." + ] + }, + { + "cell_type": "code", + "source": [ + "from transformers import TFBertForSequenceClassification, BertTokenizer \n", + "import tensorflow as tf\n", + "\n", + "MODEL_NAME = 'aloxatel/bert-base-mnli'\n", + "\n", + "tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)\n", + "tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n", + "\n", + "try:\n", + " model = TFBertForSequenceClassification.from_pretrained(MODEL_NAME)\n", + "except:\n", + " model = TFBertForSequenceClassification.from_pretrained(MODEL_NAME, from_pt=True)\n", + " \n", + "# Define TF Signature\n", + "@tf.function(\n", + " input_signature=[\n", + " {\n", + " \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n", + " \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\"),\n", + " \"token_type_ids\": tf.TensorSpec((None, None), tf.int32, name=\"token_type_ids\"),\n", + " }\n", + " ]\n", + ")\n", + "def serving_fn(input):\n", + " return model(input)\n", + "\n", + "model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LsiRkfEBQTzS", + "outputId": "f80aa406-d04c-4541-ba08-37cd63ad5065" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "All PyTorch model weights were used when initializing TFBertForSequenceClassification.\n", + "\n", + "All the weights of TFBertForSequenceClassification were initialized from the PyTorch model.\n", + "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.\n", + "WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eDjo0QGq8WRy" + }, + "source": [ + "Let's have a look inside these two directories and see what we are dealing with:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "daGPGUdz8WRz", + "outputId": "11d8c9bc-ac26-42d6-d3e0-fc08ba159102" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 427968\n", + "-rw-r--r-- 1 root root 813 Jun 6 15:13 config.json\n", + "drwxr-xr-x 3 root root 4096 Jun 6 15:13 saved_model\n", + "-rw-r--r-- 1 root root 438226204 Jun 6 15:13 tf_model.h5\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CwQH0R7h8WR1", + "outputId": "39dd8684-d1a7-4d51-daf8-d8bb994f1d01" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 9208\n", + "drwxr-xr-x 2 root root 4096 Jun 6 15:13 assets\n", + "-rw-r--r-- 1 root root 56 Jun 6 15:13 fingerprint.pb\n", + "-rw-r--r-- 1 root root 166830 Jun 6 15:13 keras_metadata.pb\n", + "-rw-r--r-- 1 root root 9245668 Jun 6 15:13 saved_model.pb\n", + "drwxr-xr-x 2 root root 4096 Jun 6 15:13 variables\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}/saved_model/1" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IPztfyM38WR2", + "outputId": "67c260e5-dff1-418e-85cd-229876e429f0" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 236\n", + "-rw-r--r-- 1 root root 125 Jun 6 15:12 special_tokens_map.json\n", + "-rw-r--r-- 1 root root 540 Jun 6 15:12 tokenizer_config.json\n", + "-rw-r--r-- 1 root root 231508 Jun 6 15:12 vocab.txt\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}_tokenizer" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gjrYDipS8WR2" + }, + "source": [ + "- As you can see, we need the SavedModel from `saved_model/1/` path\n", + "- We also be needing `vocab.txt` from the tokenizer\n", + "- All we need is to just copy the `vocab.txt` to `saved_model/1/assets` which Spark NLP will look for\n", + "- In addition to vocabs, we also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "QnQ0jke38WR3" + }, + "outputs": [], + "source": [ + "asset_path = '{}/saved_model/1/assets'.format(MODEL_NAME)\n", + "\n", + "!cp {MODEL_NAME}_tokenizer/vocab.txt {asset_path}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "WPvOXbeZ8WR4", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "ba3ac9d9-bcbe-4ca1-ff23-f163c667fea8" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['contradiction', 'entailment', 'neutral']\n" + ] + } + ], + "source": [ + "# get label strings\n", + "labels = [model.config.id2label[l] for l, v in model.config.id2label.items()]\n", + "print(labels)\n", + "\n", + "with open(asset_path+'/labels.txt', 'w') as f:\n", + " f.write('\\n'.join(labels))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UzQ650AZ8WR4" + }, + "source": [ + "Voila! We have our `vocab.txt` and `labels.txt` inside assets directory" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QcBOfJ918WR4", + "outputId": "0b3dbe3b-3b43-4f58-f5f8-d5a4151ebcbd" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 232\n", + "-rw-r--r-- 1 root root 32 Jun 6 15:14 labels.txt\n", + "-rw-r--r-- 1 root root 231508 Jun 6 15:14 vocab.txt\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}/saved_model/1/assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zk28iNof8WR5" + }, + "source": [ + "## Import and Save BertForZeroShotClassification in Spark NLP\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J__aVVu48WR5" + }, + "source": [ + "- Let's install and setup Spark NLP in Google Colab\n", + "- This part is pretty easy via our simple script" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "udnbTHNj8WR6", + "outputId": "5c00752b-c7a0-4bad-b369-5052af7ffcb5" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Installing PySpark 3.2.3 and Spark NLP 4.4.3\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.4.3\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 kB\u001b[0m \u001b[31m42.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5u9B2ldj8WR6" + }, + "source": [ + "Let's start Spark with Spark NLP included via our simple `start()` function" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "twQ6BHyo8WR6" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rOEy0EXR8WR7" + }, + "source": [ + "- Let's use `loadSavedModel` functon in `BertForZeroShotClassification` which allows us to load TensorFlow model in SavedModel format\n", + "- Most params can be set later when you are loading this model in `BertForZeroShotClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n", + "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n", + "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "lcqReFJO8WR7" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *\n", + "\n", + "zero_shot_classifier = BertForZeroShotClassification.loadSavedModel(\n", + " '{}/saved_model/1'.format(MODEL_NAME),\n", + " spark\n", + " )\\\n", + " .setInputCols([\"document\", \"token\"]) \\\n", + " .setOutputCol(\"class\") \\\n", + " .setCandidateLabels([\"urgent\", \"mobile\", \"travel\", \"movie\", \"music\", \"sport\", \"weather\", \"technology\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VmHVmBCo8WR9" + }, + "source": [ + "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "9RBvw6p58WR9" + }, + "outputs": [], + "source": [ + "zero_shot_classifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DgUg2p0v8WR9" + }, + "source": [ + "Let's clean up stuff we don't need anymore" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cdBziZhw8WR-" + }, + "outputs": [], + "source": [ + "!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_iwYIQ6U8WR-" + }, + "source": [ + "Awesome 😎 !\n", + "\n", + "This is your BertForZeroShotClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8JAkr3438WR-", + "outputId": "5a8535dd-b945-4b8f-f95e-b5fb23b8cb28" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 436628\n", + "-rw-r--r-- 1 root root 447094331 Jun 6 15:16 bert_classification_tensorflow\n", + "drwxr-xr-x 5 root root 4096 Jun 6 15:16 fields\n", + "drwxr-xr-x 2 root root 4096 Jun 6 15:16 metadata\n" + ] + } + ], + "source": [ + "! ls -l {MODEL_NAME}_spark_nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D5c2xWtt8WR-" + }, + "source": [ + "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny BertForSequenceClassification model 😊 " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "JjxWoPhW8WR_" + }, + "outputs": [], + "source": [ + "zero_shot_classifier_loaded = BertForZeroShotClassification.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n", + " .setInputCols([\"document\",'token'])\\\n", + " .setOutputCol(\"class\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rAITDhUg8WSA" + }, + "source": [ + "This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b4svOlV88WSA", + "outputId": "839f4e33-3a27-4ebe-ea2b-64ecd27d628a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------------+\n", + "| result|\n", + "+------------+\n", + "| [urgent]|\n", + "|[technology]|\n", + "| [mobile]|\n", + "| [travel]|\n", + "| [movie]|\n", + "| [sport]|\n", + "| [urgent]|\n", + "+------------+\n", + "\n" + ] + } + ], + "source": [ + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from pyspark.ml import Pipeline, PipelineModel\n", + "\n", + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "tokenizer = Tokenizer().setInputCols(\"document\").setOutputCol(\"token\")\n", + "\n", + "pipeline = Pipeline(stages=[\n", + " document_assembler,\n", + " tokenizer,\n", + " zero_shot_classifier_loaded\n", + "])\n", + "\n", + "text = [[\"I have a problem with my iphone that needs to be resolved asap!!\"],\n", + " [\"Last week I upgraded my iOS version and ever since then my phone has been overheating whenever I use your app.\"],\n", + " [\"I have a phone and I love it!\"],\n", + " [\"I really want to visit Germany and I am planning to go there next year.\"],\n", + " [\"Let's watch some movies tonight! I am in the mood for a horror movie.\"],\n", + " [\"Have you watched the match yesterday? It was a great game!\"],\n", + " [\"We need to harry up and get to the airport. We are going to miss our flight!\"]]\n", + "\n", + "# create a DataFrame in PySpark\n", + "inputDataset = spark.createDataFrame(text, [\"text\"])\n", + "model = pipeline.fit(inputDataset)\n", + "model.transform(inputDataset).select(\"class.result\").show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "26gEdXR28WSB" + }, + "source": [ + "That's it! You can now go wild and use hundreds of `BertForSequenceClassification` models as zero-shot classifiers from HuggingFace 🤗 in Spark NLP 🚀 " + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python [conda env:nlpdev]", + "language": "python", + "name": "conda-env-nlpdev-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/python/transformers/HuggingFace_in_Spark_NLP_DistilBertForZeroClassification.ipynb b/examples/python/transformers/HuggingFace_in_Spark_NLP_DistilBertForZeroClassification.ipynb new file mode 100644 index 00000000000000..139799db44700a --- /dev/null +++ b/examples/python/transformers/HuggingFace_in_Spark_NLP_DistilBertForZeroClassification.ipynb @@ -0,0 +1,2479 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "8IXf_Q668WRo" + }, + "source": [ + "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20DistilBertForZeroShotClassification.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fDfihUkE8WRr" + }, + "source": [ + "## Import DistilBertForZeroShotClassification models from HuggingFace 🤗 into Spark NLP 🚀 \n", + "\n", + "Let's keep in mind a few things before we start 😊 \n", + "\n", + "- This feature is only in `Spark NLP 4.4.1` and after. So please make sure you have upgraded to the latest Spark NLP release\n", + "- You can import DistilBERT models trained/fine-tuned for sequence classification via `DistilBertForSequenceClassification` or `TFDistilBertForSequenceClassification`. We can use these models for zero-shot classification.\n", + " - These models are usually under `Sequence Classification` category and have `distilbert` in their labels\n", + " - For zero-shot classification, We will usually use models trained on the nli data sets for best performance.\n", + "- Reference: [TFDistilBertForSequenceClassification](https://huggingface.co/transformers/model_doc/distilbert.html#tfdistilbertforsequenceclassification)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vMg3NbLo8WRs" + }, + "source": [ + "## Export and Save HuggingFace model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ykej1XKH8WRu" + }, + "source": [ + "- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n", + "- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yn28bSQi8WRu", + "outputId": "54c3b582-f829-4052-ce29-791454c17e82" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.8/5.8 MB\u001b[0m \u001b[31m62.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m104.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m72.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m62.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m109.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m38.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m103.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "tensorflow-datasets 4.9.2 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n", + "tensorflow-metadata 1.13.1 requires protobuf<5,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ], + "source": [ + "!pip install -q transformers==4.25.1 tensorflow==2.11.0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ehfCmKt98WRw" + }, + "source": [ + "- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n", + "- We'll use [distilbert-base-uncased-mnli](https://huggingface.co/typeform/distilbert-base-uncased-mnli) model from HuggingFace as an example\n", + " - For zero-shot classification, We will usually use models trained on the (m)nli data set for best performance.\n", + "- In addition to `TFDistilBertForSequenceClassification` we also need to save the `DistilBertTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 475, + "referenced_widgets": [ + "f8a1ac89cf5e4a26ad6d30c03a2b8e4d", + "a2a5e6ebdac742f7b1d4d33123c1c744", + "b10a1e979e3344ee857b1dfdf88ca748", + "851a51a9db664e2ca38b6d195384f47a", + "83fbc4e3720b4791aedc53b77ed3cb19", + "9d2fa61ddebc49d88c0e9e44d83cf36e", + "66238454a296491e8bd5ef008d450e38", + "695c064280044e169f15d9347ab23281", + "726d4e34581145479154f456c696c278", + "6df0bcc1a5f144b1bc7fe6f3cfa6a05f", + "0706c29c1d3c41c2a964f61b2fb72e20", + "9535415ff7c24a38b8d2e50e3774af01", + "660ab89806b94f72986d05f86785a4ad", + "13c72780c3bc4b3987fb5f78bb2a4904", + "f9436cd1fbbe4d75917b1a4171fed17a", + "ace0263340a34ac9b8579d5b4666faa7", + "b23d9d2b72d0488caf2b5dcca31b6314", + "ce75b803b79a43969269fdf6a890e16e", + "c9926babdab441b8a0f2a4c4f59ef92b", + "5399e1fe768345a490175e21a1ece578", + "3abdf0fcd9e54331b70c8a5008f0c1b5", + "58dc8096add9432180fd5c8b6eb101c4", + "c73aaf2bcb674518bb751f508f09292a", + "09818de216094490870001aca113adba", + "856bda93ec8941be917ab99623ca4851", + "ed5990ff8df04548bd259ae01b6e134a", + "8de2583fc33d48fd8e71b70c9919ea24", + "522d095cdad54bd48a22260924ecb9c2", + "f872b7001bcc436985d5cc2e72f7482c", + "cd2d0543ae6242d2a5d9571d5d4d0726", + "262a30c072604c048c828db2cd210176", + "40e34c26038146de9224f61d7bbda5b4", + "66698d2afeab47458a94bb4aa089184f", + "16d73d0610d04e52a0a93a45fe09854c", + "9a1344b59691413480a60f4e3d8cb741", + "d7af4f84b0ee43f7a26bb4ea7d72f048", + "7eb50fb21bf74d0f928741211108ea94", + "b7599ef85f9943e8874a30d5ca071825", + "d0993edddf534814be5e9e9c726e6011", + "01c0067c52b44de59fe07165babc8594", + "abd7567a0bd0495c9a721843786a276d", + "3d71b8d6a7bd4e26a0e8f6577dbc6d93", + "c954347f47894d1daa8fb2802e6ebb92", + "38ee16f0a5d84d92a0be6b3bef817dfe", + "68a558a48ab144a28137110ffc0dcf3b", + "341116f0e5074e3f8ce48b1cb0709b00", + "cc903d6ee439487ab10b881d10167ccf", + "9a094b07268445b0a7de94bb21e92707", + "afaa696d4c2c40df8fcc27ce79f634f1", + "4db458113c9b414d8044cfb8adb7bbba", + "38b3aa539af04122a723bb79bee08485", + "57c1248bde6549f4aa5be8aa9b5a353f", + "830a592bd3594d2cae4fe35c0620b94e", + "4b5a8de4b92d412eb59306984ac327b1", + "5cf0e182cba142e7a7e14bac01e39e04" + ] + }, + "id": "oCOSyDn88WRx", + "outputId": "a2b5b435-eb43-4f62-93ad-1c9275c3a21e" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Downloading (…)solve/main/vocab.txt: 0%| | 0.00/232k [00:00, because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n", + "WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, transformer_layer_call_fn, transformer_layer_call_and_return_conditional_losses, LayerNorm_layer_call_fn while saving (showing 5 of 164). These functions will not be directly callable after loading.\n" + ] + } + ], + "source": [ + "from transformers import TFDistilBertForSequenceClassification, DistilBertTokenizer \n", + "import tensorflow as tf\n", + "\n", + "MODEL_NAME = 'typeform/distilbert-base-uncased-mnli'\n", + "\n", + "tokenizer = DistilBertTokenizer.from_pretrained(MODEL_NAME)\n", + "tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n", + "\n", + "try:\n", + " model = TFDistilBertForSequenceClassification.from_pretrained(MODEL_NAME)\n", + "except:\n", + " model = TFDistilBertForSequenceClassification.from_pretrained(MODEL_NAME, from_pt=True)\n", + " \n", + "# Define TF Signature\n", + "@tf.function(\n", + " input_signature=[\n", + " {\n", + " \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n", + " \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\") \n", + " }\n", + " ]\n", + ")\n", + "def serving_fn(input):\n", + " return model(input)\n", + "\n", + "model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eDjo0QGq8WRy" + }, + "source": [ + "Let's have a look inside these two directories and see what we are dealing with:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "daGPGUdz8WRz", + "outputId": "d84e4167-28a1-47f0-f7e5-d28722ffe63a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 261688\n", + "-rw-r--r-- 1 root root 753 Jun 3 15:53 config.json\n", + "drwxr-xr-x 3 root root 4096 Jun 3 15:53 saved_model\n", + "-rw-r--r-- 1 root root 267954880 Jun 3 15:53 tf_model.h5\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CwQH0R7h8WR1", + "outputId": "8abc85a8-3f94-4b61-9ed4-f52dcf969092" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 5008\n", + "drwxr-xr-x 2 root root 4096 Jun 3 15:53 assets\n", + "-rw-r--r-- 1 root root 56 Jun 3 15:53 fingerprint.pb\n", + "-rw-r--r-- 1 root root 80289 Jun 3 15:53 keras_metadata.pb\n", + "-rw-r--r-- 1 root root 5032374 Jun 3 15:53 saved_model.pb\n", + "drwxr-xr-x 2 root root 4096 Jun 3 15:53 variables\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}/saved_model/1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IPztfyM38WR2", + "outputId": "11fad132-29d2-4057-da33-da8b87bcb38b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 236\n", + "-rw-r--r-- 1 root root 125 Jun 3 15:52 special_tokens_map.json\n", + "-rw-r--r-- 1 root root 574 Jun 3 15:52 tokenizer_config.json\n", + "-rw-r--r-- 1 root root 231508 Jun 3 15:52 vocab.txt\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}_tokenizer" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gjrYDipS8WR2" + }, + "source": [ + "- As you can see, we need the SavedModel from `saved_model/1/` path\n", + "- We also be needing `vocab.txt` from the tokenizer\n", + "- All we need is to just copy the `vocab.txt` to `saved_model/1/assets` which Spark NLP will look for\n", + "- In addition to vocabs, we also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QnQ0jke38WR3" + }, + "outputs": [], + "source": [ + "asset_path = '{}/saved_model/1/assets'.format(MODEL_NAME)\n", + "\n", + "!cp {MODEL_NAME}_tokenizer/vocab.txt {asset_path}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WPvOXbeZ8WR4" + }, + "outputs": [], + "source": [ + "# get label2id dictionary \n", + "labels = model.config.label2id\n", + "# sort the dictionary based on the id\n", + "labels = sorted(labels, key=labels.get)\n", + "\n", + "with open(asset_path+'/labels.txt', 'w') as f:\n", + " f.write('\\n'.join(labels))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UzQ650AZ8WR4" + }, + "source": [ + "Voila! We have our `vocab.txt` and `labels.txt` inside assets directory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QcBOfJ918WR4", + "outputId": "10112997-f328-4747-fb7f-0d37072e29e8" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 232\n", + "-rw-r--r-- 1 root root 32 Jun 3 15:53 labels.txt\n", + "-rw-r--r-- 1 root root 231508 Jun 3 15:53 vocab.txt\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}/saved_model/1/assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zk28iNof8WR5" + }, + "source": [ + "## Import and Save DistilBertForZeroShotClassification in Spark NLP\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J__aVVu48WR5" + }, + "source": [ + "- Let's install and setup Spark NLP in Google Colab\n", + "- This part is pretty easy via our simple script" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "udnbTHNj8WR6", + "outputId": "e0b6b426-2e0d-4be8-d831-bcea5d64f288" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Installing PySpark 3.2.3 and Spark NLP 4.4.3\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.4.3\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 kB\u001b[0m \u001b[31m34.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5u9B2ldj8WR6" + }, + "source": [ + "Let's start Spark with Spark NLP included via our simple `start()` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "twQ6BHyo8WR6" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rOEy0EXR8WR7" + }, + "source": [ + "- Let's use `loadSavedModel` functon in `DistilBertForZeroShotClassification` which allows us to load TensorFlow model in SavedModel format\n", + "- Most params can be set later when you are loading this model in `DistilBertForZeroShotClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n", + "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n", + "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lcqReFJO8WR7" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *\n", + "\n", + "zero_shot_classifier = DistilBertForZeroShotClassification.loadSavedModel(\n", + " '{}/saved_model/1'.format(MODEL_NAME),\n", + " spark\n", + " )\\\n", + " .setInputCols([\"document\", \"token\"]) \\\n", + " .setOutputCol(\"class\") \\\n", + " .setCandidateLabels([\"urgent\", \"mobile\", \"travel\", \"movie\", \"music\", \"sport\", \"weather\", \"technology\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VmHVmBCo8WR9" + }, + "source": [ + "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9RBvw6p58WR9" + }, + "outputs": [], + "source": [ + "zero_shot_classifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DgUg2p0v8WR9" + }, + "source": [ + "Let's clean up stuff we don't need anymore" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cdBziZhw8WR-" + }, + "outputs": [], + "source": [ + "!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_iwYIQ6U8WR-" + }, + "source": [ + "Awesome 😎 !\n", + "\n", + "This is your DistilBertForSequenceClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8JAkr3438WR-", + "outputId": "2ec0cc08-2122-4301-e7dc-84fb91eabf5e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 266440\n", + "-rw-r--r-- 1 root root 272826157 Jun 3 15:58 distilbert_classification_tensorflow\n", + "drwxr-xr-x 5 root root 4096 Jun 3 15:58 fields\n", + "drwxr-xr-x 2 root root 4096 Jun 3 15:58 metadata\n" + ] + } + ], + "source": [ + "! ls -l {MODEL_NAME}_spark_nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D5c2xWtt8WR-" + }, + "source": [ + "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny BertForSequenceClassification model 😊 " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JjxWoPhW8WR_" + }, + "outputs": [], + "source": [ + "zero_shot_classifier_loaded = DistilBertForZeroShotClassification.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n", + " .setInputCols([\"document\",'token'])\\\n", + " .setOutputCol(\"class\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rAITDhUg8WSA" + }, + "source": [ + "This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b4svOlV88WSA", + "outputId": "da5aefa6-efb2-43f4-9cf4-537cac5afe3b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------------+\n", + "| result|\n", + "+------------+\n", + "| [mobile]|\n", + "|[technology]|\n", + "| [mobile]|\n", + "| [travel]|\n", + "| [weather]|\n", + "| [sport]|\n", + "| [urgent]|\n", + "+------------+\n", + "\n" + ] + } + ], + "source": [ + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from pyspark.ml import Pipeline, PipelineModel\n", + "\n", + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "tokenizer = Tokenizer().setInputCols(\"document\").setOutputCol(\"token\")\n", + "\n", + "pipeline = Pipeline(stages=[\n", + " document_assembler,\n", + " tokenizer,\n", + " zero_shot_classifier_loaded\n", + "])\n", + "\n", + "text = [[\"I have a problem with my iphone that needs to be resolved asap!!\"],\n", + " [\"Last week I upgraded my iOS version and ever since then my phone has been overheating whenever I use your app.\"],\n", + " [\"I have a phone and I love it!\"],\n", + " [\"I really want to visit Germany and I am planning to go there next year.\"],\n", + " [\"Let's watch some movies tonight! I am in the mood for a horror movie.\"],\n", + " [\"Have you watched the match yesterday? It was a great game!\"],\n", + " [\"We need to harry up and get to the airport. We are going to miss our flight!\"]]\n", + "\n", + "# create a DataFrame in PySpark\n", + "inputDataset = spark.createDataFrame(text, [\"text\"])\n", + "model = pipeline.fit(inputDataset)\n", + "model.transform(inputDataset).select(\"class.result\").show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "26gEdXR28WSB" + }, + "source": [ + "That's it! You can now go wild and use hundreds of \n", + "`DistilBertForSequenceClassification` models as zero-shot classifiers from HuggingFace 🤗 in Spark NLP 🚀" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python [conda env:nlpdev]", + "language": "python", + "name": "conda-env-nlpdev-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "f8a1ac89cf5e4a26ad6d30c03a2b8e4d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a2a5e6ebdac742f7b1d4d33123c1c744", + "IPY_MODEL_b10a1e979e3344ee857b1dfdf88ca748", + "IPY_MODEL_851a51a9db664e2ca38b6d195384f47a" + ], + "layout": "IPY_MODEL_83fbc4e3720b4791aedc53b77ed3cb19" + } + }, + "a2a5e6ebdac742f7b1d4d33123c1c744": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9d2fa61ddebc49d88c0e9e44d83cf36e", + "placeholder": "​", + "style": "IPY_MODEL_66238454a296491e8bd5ef008d450e38", + "value": "Downloading (…)solve/main/vocab.txt: 100%" + } + }, + "b10a1e979e3344ee857b1dfdf88ca748": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_695c064280044e169f15d9347ab23281", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_726d4e34581145479154f456c696c278", + "value": 231508 + } + }, + "851a51a9db664e2ca38b6d195384f47a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6df0bcc1a5f144b1bc7fe6f3cfa6a05f", + "placeholder": "​", + "style": "IPY_MODEL_0706c29c1d3c41c2a964f61b2fb72e20", + "value": " 232k/232k [00:00<00:00, 1.43MB/s]" + } + }, + "83fbc4e3720b4791aedc53b77ed3cb19": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9d2fa61ddebc49d88c0e9e44d83cf36e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "66238454a296491e8bd5ef008d450e38": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "695c064280044e169f15d9347ab23281": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "726d4e34581145479154f456c696c278": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "6df0bcc1a5f144b1bc7fe6f3cfa6a05f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0706c29c1d3c41c2a964f61b2fb72e20": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9535415ff7c24a38b8d2e50e3774af01": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_660ab89806b94f72986d05f86785a4ad", + "IPY_MODEL_13c72780c3bc4b3987fb5f78bb2a4904", + "IPY_MODEL_f9436cd1fbbe4d75917b1a4171fed17a" + ], + "layout": "IPY_MODEL_ace0263340a34ac9b8579d5b4666faa7" + } + }, + "660ab89806b94f72986d05f86785a4ad": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b23d9d2b72d0488caf2b5dcca31b6314", + "placeholder": "​", + "style": "IPY_MODEL_ce75b803b79a43969269fdf6a890e16e", + "value": "Downloading (…)cial_tokens_map.json: 100%" + } + }, + "13c72780c3bc4b3987fb5f78bb2a4904": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c9926babdab441b8a0f2a4c4f59ef92b", + "max": 112, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5399e1fe768345a490175e21a1ece578", + "value": 112 + } + }, + "f9436cd1fbbe4d75917b1a4171fed17a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3abdf0fcd9e54331b70c8a5008f0c1b5", + "placeholder": "​", + "style": "IPY_MODEL_58dc8096add9432180fd5c8b6eb101c4", + "value": " 112/112 [00:00<00:00, 4.22kB/s]" + } + }, + "ace0263340a34ac9b8579d5b4666faa7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b23d9d2b72d0488caf2b5dcca31b6314": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ce75b803b79a43969269fdf6a890e16e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c9926babdab441b8a0f2a4c4f59ef92b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5399e1fe768345a490175e21a1ece578": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3abdf0fcd9e54331b70c8a5008f0c1b5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "58dc8096add9432180fd5c8b6eb101c4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c73aaf2bcb674518bb751f508f09292a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_09818de216094490870001aca113adba", + "IPY_MODEL_856bda93ec8941be917ab99623ca4851", + "IPY_MODEL_ed5990ff8df04548bd259ae01b6e134a" + ], + "layout": "IPY_MODEL_8de2583fc33d48fd8e71b70c9919ea24" + } + }, + "09818de216094490870001aca113adba": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_522d095cdad54bd48a22260924ecb9c2", + "placeholder": "​", + "style": "IPY_MODEL_f872b7001bcc436985d5cc2e72f7482c", + "value": "Downloading (…)okenizer_config.json: 100%" + } + }, + "856bda93ec8941be917ab99623ca4851": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cd2d0543ae6242d2a5d9571d5d4d0726", + "max": 258, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_262a30c072604c048c828db2cd210176", + "value": 258 + } + }, + "ed5990ff8df04548bd259ae01b6e134a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_40e34c26038146de9224f61d7bbda5b4", + "placeholder": "​", + "style": "IPY_MODEL_66698d2afeab47458a94bb4aa089184f", + "value": " 258/258 [00:00<00:00, 12.2kB/s]" + } + }, + "8de2583fc33d48fd8e71b70c9919ea24": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "522d095cdad54bd48a22260924ecb9c2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f872b7001bcc436985d5cc2e72f7482c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cd2d0543ae6242d2a5d9571d5d4d0726": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "262a30c072604c048c828db2cd210176": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "40e34c26038146de9224f61d7bbda5b4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "66698d2afeab47458a94bb4aa089184f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "16d73d0610d04e52a0a93a45fe09854c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9a1344b59691413480a60f4e3d8cb741", + "IPY_MODEL_d7af4f84b0ee43f7a26bb4ea7d72f048", + "IPY_MODEL_7eb50fb21bf74d0f928741211108ea94" + ], + "layout": "IPY_MODEL_b7599ef85f9943e8874a30d5ca071825" + } + }, + "9a1344b59691413480a60f4e3d8cb741": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d0993edddf534814be5e9e9c726e6011", + "placeholder": "​", + "style": "IPY_MODEL_01c0067c52b44de59fe07165babc8594", + "value": "Downloading (…)lve/main/config.json: 100%" + } + }, + "d7af4f84b0ee43f7a26bb4ea7d72f048": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_abd7567a0bd0495c9a721843786a276d", + "max": 776, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3d71b8d6a7bd4e26a0e8f6577dbc6d93", + "value": 776 + } + }, + "7eb50fb21bf74d0f928741211108ea94": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c954347f47894d1daa8fb2802e6ebb92", + "placeholder": "​", + "style": "IPY_MODEL_38ee16f0a5d84d92a0be6b3bef817dfe", + "value": " 776/776 [00:00<00:00, 27.6kB/s]" + } + }, + "b7599ef85f9943e8874a30d5ca071825": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d0993edddf534814be5e9e9c726e6011": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "01c0067c52b44de59fe07165babc8594": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "abd7567a0bd0495c9a721843786a276d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3d71b8d6a7bd4e26a0e8f6577dbc6d93": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c954347f47894d1daa8fb2802e6ebb92": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "38ee16f0a5d84d92a0be6b3bef817dfe": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "68a558a48ab144a28137110ffc0dcf3b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_341116f0e5074e3f8ce48b1cb0709b00", + "IPY_MODEL_cc903d6ee439487ab10b881d10167ccf", + "IPY_MODEL_9a094b07268445b0a7de94bb21e92707" + ], + "layout": "IPY_MODEL_afaa696d4c2c40df8fcc27ce79f634f1" + } + }, + "341116f0e5074e3f8ce48b1cb0709b00": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4db458113c9b414d8044cfb8adb7bbba", + "placeholder": "​", + "style": "IPY_MODEL_38b3aa539af04122a723bb79bee08485", + "value": "Downloading tf_model.h5: 100%" + } + }, + "cc903d6ee439487ab10b881d10167ccf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_57c1248bde6549f4aa5be8aa9b5a353f", + "max": 267954880, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_830a592bd3594d2cae4fe35c0620b94e", + "value": 267954880 + } + }, + "9a094b07268445b0a7de94bb21e92707": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4b5a8de4b92d412eb59306984ac327b1", + "placeholder": "​", + "style": "IPY_MODEL_5cf0e182cba142e7a7e14bac01e39e04", + "value": " 268M/268M [00:06<00:00, 40.4MB/s]" + } + }, + "afaa696d4c2c40df8fcc27ce79f634f1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4db458113c9b414d8044cfb8adb7bbba": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "38b3aa539af04122a723bb79bee08485": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "57c1248bde6549f4aa5be8aa9b5a353f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "830a592bd3594d2cae4fe35c0620b94e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "4b5a8de4b92d412eb59306984ac327b1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5cf0e182cba142e7a7e14bac01e39e04": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/python/transformers/HuggingFace_in_Spark_NLP_RoBertaForZeroShotClassification.ipynb b/examples/python/transformers/HuggingFace_in_Spark_NLP_RoBertaForZeroShotClassification.ipynb new file mode 100644 index 00000000000000..22ebb65c7945e0 --- /dev/null +++ b/examples/python/transformers/HuggingFace_in_Spark_NLP_RoBertaForZeroShotClassification.ipynb @@ -0,0 +1,2839 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "8IXf_Q668WRo" + }, + "source": [ + "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20RoBertaForZeroShotClassification.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fDfihUkE8WRr" + }, + "source": [ + "## Import RoBertaForZeroShotClassification models from HuggingFace 🤗 into Spark NLP 🚀 \n", + "\n", + "Let's keep in mind a few things before we start 😊 \n", + "\n", + "- This feature is only in `Spark NLP 4.4.2` and after. So please make sure you have upgraded to the latest Spark NLP release\n", + "- You can import RoBerta models trained/fine-tuned for sequence classification via `RobertaForSequenceClassification` or `TFRobertaForSequenceClassification`. We can use these models for zero-shot classification.\n", + " - These models are usually under `Sequence Classification` category and have `roberta` in their labels\n", + " - For zero-shot classification, We will usually use models trained on the nli data sets for best performance.\n", + "- Reference: [TFRobertaForSequenceClassification](https://huggingface.co/docs/transformers/v4.29.1/en/model_doc/roberta#transformers.TFRobertaForSequenceClassification)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vMg3NbLo8WRs" + }, + "source": [ + "## Export and Save HuggingFace model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ykej1XKH8WRu" + }, + "source": [ + "- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n", + "- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yn28bSQi8WRu", + "outputId": "ca6816ea-232a-4d44-8526-d14948561b0a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.8/5.8 MB\u001b[0m \u001b[31m51.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m95.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m68.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m58.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m88.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m38.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m104.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "tensorflow-datasets 4.9.2 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n", + "tensorflow-metadata 1.13.1 requires protobuf<5,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ], + "source": [ + "!pip install -q transformers==4.25.1 tensorflow==2.11.0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ehfCmKt98WRw" + }, + "source": [ + "- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n", + "- We'll use [cross-encoder/nli-roberta-base](cross-encoder/nli-roberta-base) model from HuggingFace as an example\n", + " - For zero-shot classification, We will usually use models trained on the (m)nli data set for best performance.\n", + "- In addition to `TFRobertaForSequenceClassification` we also need to save the `RobertaTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 333, + "referenced_widgets": [ + "13304044c4d74d14af90338c5f3d5493", + "8bf57ab59ef7430298a33e893a6b6743", + "a7bc950c63434a4ea756b0689e69c0fb", + "fe6473dcaecc4bba8d4c5b1d24b56dde", + "1777d601c93842d7a8d6439671ffc4b6", + "a2a74759f3544822911bb9d208e19398", + "3f2f0b73ebb845c3be7bb024abcc168c", + "1f7796c027b247808ff57f0bc959c037", + "20c79f8d5e68453c961f8bc758b9cc05", + "b6302a2a129743f981b659778eb11605", + "eed2c7f332f247d9b794ae800c1e2604", + "7683b71b597340f9be1fd0df61d2e26e", + "7d61763ef4324825ac49027c0c503a41", + "317c2bd80fe34fc5beee20f741abe120", + "776446051c914eadb326d288dd25260e", + "bb9dcee68fc240a1863665efc8ac8798", + "4e83111b26c547329461050650b88ca5", + "ce5514402f6141ef8f958447036ff6d7", + "27db9b81b53141719d6916e8d90e43f1", + "f0432c9b017b46acab7bdb2f5839ea3f", + "95a6d8a45782463d90667e58acd39e8f", + "4b1ba2442b554cb9b127b86762211c5c", + "f46f7c60e06347bf8571060e0e33316d", + "8551ac2036b344b7a7c92ee4298aeb0c", + "e906ec85e3894643a6e3b16761600e9f", + "b1e94dce806f4a1bb6fdd780eec5d80e", + "05148ad940fc43b19ec9b4d7af0b339b", + "f397c9ef82fc410187f6e1efe3fc9775", + "9e557261a3c444a79e97cbedf0dd3d0b", + "85720396966f498db2975248ca100c6b", + "5b56199e5e7c447c98bb97b60c4241f5", + "a45f190ad5df4a6b9d058ea1162cfb03", + "aafe8988f20f47afb773dd31a8233ce7", + "9687bb7a41ef4e0d815f33b986d700e6", + "9268ee6bcfdc475c8af00fd363727531", + "66f2971ea05549768a5527a52f397305", + "6e4a2f12e3d34ee491d887d515842d98", + "8a7ac648178c40eb8caf767560e33eb0", + "b5e67cb55e624666ac086f4adc970959", + "2e8660d4bbaa46079abd448d8b628d18", + "df40d99ce76140019b8b47b2c75e752b", + "4a987727e9de47988c5ac0bf8b9e57dc", + "8d8a1992e90e4521bb6b9094540fd4e0", + "b4b8b11edb5043b89921d68d250603d5", + "d44690bc2a8c4466916e71a4c13cf0e1", + "5a9db27686e044c9bb299bcca53ec9e7", + "d7bef74ee03545968ea1cc559006d046", + "7d7007c1b14a40479479868563d26f9a", + "a85c3bdc21e2487b8a0e13bf212e2929", + "957865dabc1044249459439aae39c75d", + "e1ca1239d791483ba4b4300a9742fa36", + "5814640904e44dc8a28a3ff280c3ce9f", + "55a1233a0555432db42f32f37033c8e5", + "96c1b3c4c139440b87742a06ba7f16d8", + "d6f2944954564fed9819ae9cd63cb885", + "7475900bacdd41a185d080c62ad91a12", + "966f7d7fe6084c7d9c2a0c6668a106e5", + "091f5a1c52c24e9fb046e5da680b644f", + "ad95e9b538a24b2ca1f7163852d927bc", + "960d2088d5314f4986fa46a5338f02b7", + "fce6cd5893e64bf59fc4fd579a49a006", + "224f786d7cf1427a85150cde9f8eb09b", + "027552ca8e8c47d3ab477aebd8b53e03", + "c95275785ae441b186a887bbbbf38611", + "2a84bfe6579f47dea24d804b1671334c", + "4c08c4c39642458d96ed17d99943fcc3" + ] + }, + "id": "oCOSyDn88WRx", + "outputId": "3d224291-7e2a-4f8e-d5fd-3c045ec14bc3" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Downloading (…)olve/main/vocab.json: 0%| | 0.00/899k [00:00 Date: Thu, 8 Jun 2023 12:56:27 +0200 Subject: [PATCH 03/11] Fix and update new notebooks for XXXZeroShotForClassification --- docs/en/transformers.md | 8 +- ...NLP - BertForZeroShotClassification.ipynb} | 82 +++++++++++-------- ...P - DistilBertForZeroClassification.ipynb} | 0 ... - RoBertaForZeroShotClassification.ipynb} | 0 4 files changed, 56 insertions(+), 34 deletions(-) rename examples/python/transformers/{HuggingFace_in_Spark_NLP_BertForZeroShotClassification.ipynb => HuggingFace in Spark NLP - BertForZeroShotClassification.ipynb} (98%) rename examples/python/transformers/{HuggingFace_in_Spark_NLP_DistilBertForZeroClassification.ipynb => HuggingFace in Spark NLP - DistilBertForZeroClassification.ipynb} (100%) rename examples/python/transformers/{HuggingFace_in_Spark_NLP_RoBertaForZeroShotClassification.ipynb => HuggingFace in Spark NLP - RoBertaForZeroShotClassification.ipynb} (100%) diff --git a/docs/en/transformers.md b/docs/en/transformers.md index 88f5a8eb808812..b1f6e8d49b6602 100644 --- a/docs/en/transformers.md +++ b/docs/en/transformers.md @@ -88,7 +88,9 @@ We have extended support for `HuggingFace` 🤗 and `TF Hub` exported models s | SwinForImageClassification | | ❎ | [TFSwinForImageClassification](https://huggingface.co/docs/transformers/model_doc/swin#transformers.TFSwinForImageClassification) | | HubertForCTC | | ❎ | [TFHubertForCTC](https://huggingface.co/docs/transformers/model_doc/hubert#transformers.TFHubertForCTC) | | ConvNextForImageClassification | | ❎ | [TFConvNextForImageClassification](https://huggingface.co/docs/transformers/model_doc/convnext#transformers.TFConvNextForImageClassification) | - +| BertForZeroShotClassification | | ✅ | [TFBertForSequenceClassification](https://huggingface.co/docs/transformers/model_doc/bert#transformers.TFBertForSequenceClassification) +| DistilBertForZeroShotClassification | | ✅ | [TFDistilBertForSequenceClassification](https://huggingface.co/docs/transformers/model_doc/distilbert#transformers.TFDistilBertForSequenceClassification) | +| RoBertaForZeroShotClassification | | ✅ | [TFRobertaForSequenceClassification](https://huggingface.co/docs/transformers/model_doc/roberta#transformers.TFRobertaForSequenceClassification) | | T5Transformer | | ❌ | | | MarianTransformer | | ❌ | | | OpenAI GPT2 | | ❌ | | @@ -134,7 +136,9 @@ We have extended support for `HuggingFace` 🤗 and `TF Hub` exported models s | ViTForImageClassification | [HuggingFace in Spark NLP - ViTForImageClassification](https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20ViTForImageClassification.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20ViTForImageClassification.ipynb) | ConvNextForImageClassification | [HuggingFace in Spark NLP - ConvNextForImageClassification](https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20ConvNextForImageClassification.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20ConvNextForImageClassification.ipynb) | SwinForImageClassification | [HuggingFace in Spark NLP - SwinForImageClassification](https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20SwinForImageClassification.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20SwinForImageClassification.ipynb) - +| BertForZeroShotClassification | [HuggingFace in Spark NLP - BertForZeroShotClassification](https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20BertForSequenceClassification.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20BertForZeroShotClassification.ipynb) | +| DistilBertForZeroShotClassification | [HuggingFace in Spark NLP - DistilBertForZeroShotClassification](https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20DistilBertForZeroShotClassification.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20DistilBertForZeroShotClassification.ipynb) | +| RoBertaForZeroShotClassification | [HuggingFace in Spark NLP - RoBertaForZeroShotClassification](https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20RoBertaForZeroShotClassification.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20RoBertaForZeroShotClassification.ipynb) | #### TF Hub to Spark NLP diff --git a/examples/python/transformers/HuggingFace_in_Spark_NLP_BertForZeroShotClassification.ipynb b/examples/python/transformers/HuggingFace in Spark NLP - BertForZeroShotClassification.ipynb similarity index 98% rename from examples/python/transformers/HuggingFace_in_Spark_NLP_BertForZeroShotClassification.ipynb rename to examples/python/transformers/HuggingFace in Spark NLP - BertForZeroShotClassification.ipynb index 796edc7d0ac0e1..fba2dadc539b14 100644 --- a/examples/python/transformers/HuggingFace_in_Spark_NLP_BertForZeroShotClassification.ipynb +++ b/examples/python/transformers/HuggingFace in Spark NLP - BertForZeroShotClassification.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "8IXf_Q668WRo" @@ -12,6 +13,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "fDfihUkE8WRr" @@ -29,6 +31,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "vMg3NbLo8WRs" @@ -38,6 +41,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "Ykej1XKH8WRu" @@ -59,8 +63,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.8/5.8 MB\u001b[0m \u001b[31m63.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", @@ -83,6 +87,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "ehfCmKt98WRw" @@ -95,6 +100,27 @@ }, { "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LsiRkfEBQTzS", + "outputId": "f80aa406-d04c-4541-ba08-37cd63ad5065" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "All PyTorch model weights were used when initializing TFBertForSequenceClassification.\n", + "\n", + "All the weights of TFBertForSequenceClassification were initialized from the PyTorch model.\n", + "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.\n", + "WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.\n" + ] + } + ], "source": [ "from transformers import TFBertForSequenceClassification, BertTokenizer \n", "import tensorflow as tf\n", @@ -123,30 +149,10 @@ " return model(input)\n", "\n", "model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "LsiRkfEBQTzS", - "outputId": "f80aa406-d04c-4541-ba08-37cd63ad5065" - }, - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "All PyTorch model weights were used when initializing TFBertForSequenceClassification.\n", - "\n", - "All the weights of TFBertForSequenceClassification were initialized from the PyTorch model.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.\n", - "WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.\n" - ] - } ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "eDjo0QGq8WRy" @@ -167,8 +173,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "total 427968\n", "-rw-r--r-- 1 root root 813 Jun 6 15:13 config.json\n", @@ -193,8 +199,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "total 9208\n", "drwxr-xr-x 2 root root 4096 Jun 6 15:13 assets\n", @@ -221,8 +227,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "total 236\n", "-rw-r--r-- 1 root root 125 Jun 6 15:12 special_tokens_map.json\n", @@ -236,6 +242,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "gjrYDipS8WR2" @@ -264,16 +271,16 @@ "cell_type": "code", "execution_count": 8, "metadata": { - "id": "WPvOXbeZ8WR4", "colab": { "base_uri": "https://localhost:8080/" }, + "id": "WPvOXbeZ8WR4", "outputId": "ba3ac9d9-bcbe-4ca1-ff23-f163c667fea8" }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "['contradiction', 'entailment', 'neutral']\n" ] @@ -289,6 +296,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "UzQ650AZ8WR4" @@ -309,8 +317,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "total 232\n", "-rw-r--r-- 1 root root 32 Jun 6 15:14 labels.txt\n", @@ -323,6 +331,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "zk28iNof8WR5" @@ -332,6 +341,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "J__aVVu48WR5" @@ -353,8 +363,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Installing PySpark 3.2.3 and Spark NLP 4.4.3\n", "setup Colab for PySpark 3.2.3 and Spark NLP 4.4.3\n", @@ -371,6 +381,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "5u9B2ldj8WR6" @@ -393,6 +404,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "rOEy0EXR8WR7" @@ -425,6 +437,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "VmHVmBCo8WR9" @@ -445,6 +458,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "DgUg2p0v8WR9" @@ -465,6 +479,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "_iwYIQ6U8WR-" @@ -487,8 +502,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "total 436628\n", "-rw-r--r-- 1 root root 447094331 Jun 6 15:16 bert_classification_tensorflow\n", @@ -502,6 +517,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "D5c2xWtt8WR-" @@ -524,6 +540,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "rAITDhUg8WSA" @@ -544,8 +561,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "+------------+\n", "| result|\n", @@ -594,6 +611,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "26gEdXR28WSB" @@ -627,4 +645,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/examples/python/transformers/HuggingFace_in_Spark_NLP_DistilBertForZeroClassification.ipynb b/examples/python/transformers/HuggingFace in Spark NLP - DistilBertForZeroClassification.ipynb similarity index 100% rename from examples/python/transformers/HuggingFace_in_Spark_NLP_DistilBertForZeroClassification.ipynb rename to examples/python/transformers/HuggingFace in Spark NLP - DistilBertForZeroClassification.ipynb diff --git a/examples/python/transformers/HuggingFace_in_Spark_NLP_RoBertaForZeroShotClassification.ipynb b/examples/python/transformers/HuggingFace in Spark NLP - RoBertaForZeroShotClassification.ipynb similarity index 100% rename from examples/python/transformers/HuggingFace_in_Spark_NLP_RoBertaForZeroShotClassification.ipynb rename to examples/python/transformers/HuggingFace in Spark NLP - RoBertaForZeroShotClassification.ipynb From 6bde8c6010905e53a61cf69b45a819f8f39da793 Mon Sep 17 00:00:00 2001 From: Maziyar Panahi Date: Thu, 8 Jun 2023 13:08:31 +0200 Subject: [PATCH 04/11] Fix the wrong url for examples [skip test] --- python/docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/docs/index.rst b/python/docs/index.rst index e388557b1e77c9..3319fa9b979dc5 100644 --- a/python/docs/index.rst +++ b/python/docs/index.rst @@ -21,7 +21,7 @@ Spark NLP Documentation ####################### -`Main Page `_ | `GitHub `_ | `Issues `_ | `Workshop `_ | `Models Hub `_ +`Main Page `_ | `GitHub `_ | `Issues `_ | `Examples `_ | `Models Hub `_ Welcome to Spark NLP's Python documentation! This page contains information how to use the library with examples. From d7636a247bdcfe6a5f8ae87ec2e15d5804e9f365 Mon Sep 17 00:00:00 2001 From: Maziyar Panahi Date: Thu, 8 Jun 2023 13:25:40 +0200 Subject: [PATCH 05/11] Bump version to 4.4.4 [run doc] --- README.md | 88 +- build.sbt | 2 +- docs/en/concepts.md | 2 +- docs/en/examples.md | 4 +- docs/en/hardware_acceleration.md | 2 +- docs/en/install.md | 54 +- docs/en/spark_nlp.md | 2 +- examples/docker/README.md | 4 +- ... NLP - BertForZeroShotClassification.ipynb | 18 +- ...LP - DistilBertForZeroClassification.ipynb | 1204 ++++++------- ...P - RoBertaForZeroShotClassification.ipynb | 1566 +++++++++-------- python/README.md | 88 +- python/docs/conf.py | 2 +- python/setup.py | 2 +- python/sparknlp/__init__.py | 4 +- scripts/colab_setup.sh | 2 +- scripts/kaggle_setup.sh | 2 +- scripts/sagemaker_setup.sh | 2 +- .../scala/com/johnsnowlabs/nlp/SparkNLP.scala | 2 +- .../scala/com/johnsnowlabs/util/Build.scala | 2 +- 20 files changed, 1523 insertions(+), 1529 deletions(-) diff --git a/README.md b/README.md index 895a7ec404d388..1e7009ef6ab9c6 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,7 @@ To use Spark NLP you need the following requirements: **GPU (optional):** -Spark NLP 4.4.3 is built with TensorFlow 2.7.1 and the following NVIDIA® software are only required for GPU support: +Spark NLP 4.4.4 is built with TensorFlow 2.7.1 and the following NVIDIA® software are only required for GPU support: - NVIDIA® GPU drivers version 450.80.02 or higher - CUDA® Toolkit 11.2 @@ -181,7 +181,7 @@ $ java -version $ conda create -n sparknlp python=3.7 -y $ conda activate sparknlp # spark-nlp by default is based on pyspark 3.x -$ pip install spark-nlp==4.4.3 pyspark==3.3.1 +$ pip install spark-nlp==4.4.4 pyspark==3.3.1 ``` In Python console or Jupyter `Python3` kernel: @@ -226,7 +226,7 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh ## Apache Spark Support -Spark NLP *4.4.3* has been built on top of Apache Spark 3.2 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x +Spark NLP *4.4.4* has been built on top of Apache Spark 3.2 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x | Spark NLP | Apache Spark 2.3.x | Apache Spark 2.4.x | Apache Spark 3.0.x | Apache Spark 3.1.x | Apache Spark 3.2.x | Apache Spark 3.3.x | Apache Spark 3.4.x | |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------| @@ -265,7 +265,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github ## Databricks Support -Spark NLP 4.4.3 has been tested and is compatible with the following runtimes: +Spark NLP 4.4.4 has been tested and is compatible with the following runtimes: **CPU:** @@ -322,7 +322,7 @@ runtimes supporting CUDA 11 are 9.x and above as listed under GPU. ## EMR Support -Spark NLP 4.4.3 has been tested and is compatible with the following EMR releases: +Spark NLP 4.4.4 has been tested and is compatible with the following EMR releases: - emr-6.2.0 - emr-6.3.0 @@ -365,11 +365,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x, ```sh # CPU -spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 +spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 -pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 +pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 -spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 +spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 ``` The `spark-nlp` has been published to @@ -378,11 +378,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s ```sh # GPU -spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:4.4.3 +spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:4.4.4 -pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:4.4.3 +pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:4.4.4 -spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:4.4.3 +spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:4.4.4 ``` @@ -392,11 +392,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s ```sh # AArch64 -spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:4.4.3 +spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:4.4.4 -pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:4.4.3 +pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:4.4.4 -spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:4.4.3 +spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:4.4.4 ``` @@ -406,11 +406,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s ```sh # M1/M2 (Apple Silicon) -spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:4.4.3 +spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:4.4.4 -pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:4.4.3 +pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:4.4.4 -spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:4.4.3 +spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:4.4.4 ``` @@ -424,7 +424,7 @@ set in your SparkSession: spark-shell \ --driver-memory 16g \ --conf spark.kryoserializer.buffer.max=2000M \ - --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 + --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 ``` ## Scala @@ -442,7 +442,7 @@ coordinates: com.johnsnowlabs.nlp spark-nlp_2.12 - 4.4.3 + 4.4.4 ``` @@ -453,7 +453,7 @@ coordinates: com.johnsnowlabs.nlp spark-nlp-gpu_2.12 - 4.4.3 + 4.4.4 ``` @@ -464,7 +464,7 @@ coordinates: com.johnsnowlabs.nlp spark-nlp-aarch64_2.12 - 4.4.3 + 4.4.4 ``` @@ -475,7 +475,7 @@ coordinates: com.johnsnowlabs.nlp spark-nlp-silicon_2.12 - 4.4.3 + 4.4.4 ``` @@ -485,28 +485,28 @@ coordinates: ```sbtshell // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp -libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "4.4.3" +libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "4.4.4" ``` **spark-nlp-gpu:** ```sbtshell // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu -libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "4.4.3" +libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "4.4.4" ``` **spark-nlp-aarch64:** ```sbtshell // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64 -libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "4.4.3" +libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "4.4.4" ``` **spark-nlp-silicon:** ```sbtshell // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon -libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "4.4.3" +libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "4.4.4" ``` Maven @@ -528,7 +528,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through Pip: ```bash -pip install spark-nlp==4.4.3 +pip install spark-nlp==4.4.4 ``` Conda: @@ -557,7 +557,7 @@ spark = SparkSession.builder .config("spark.driver.memory", "16G") .config("spark.driver.maxResultSize", "0") .config("spark.kryoserializer.buffer.max", "2000M") - .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3") + .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4") .getOrCreate() ``` @@ -628,7 +628,7 @@ Use either one of the following options - Add the following Maven Coordinates to the interpreter's library list ```bash -com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 +com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 ``` - Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is @@ -639,7 +639,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 Apart from the previous step, install the python module through pip ```bash -pip install spark-nlp==4.4.3 +pip install spark-nlp==4.4.4 ``` Or you can install `spark-nlp` from inside Zeppelin by using Conda: @@ -667,7 +667,7 @@ launch the Jupyter from the same Python environment: $ conda create -n sparknlp python=3.8 -y $ conda activate sparknlp # spark-nlp by default is based on pyspark 3.x -$ pip install spark-nlp==4.4.3 pyspark==3.3.1 jupyter +$ pip install spark-nlp==4.4.4 pyspark==3.3.1 jupyter $ jupyter notebook ``` @@ -684,7 +684,7 @@ export PYSPARK_PYTHON=python3 export PYSPARK_DRIVER_PYTHON=jupyter export PYSPARK_DRIVER_PYTHON_OPTS=notebook -pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 +pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 ``` Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp` @@ -711,7 +711,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi # -s is for spark-nlp # -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage # by default they are set to the latest -!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 4.4.3 +!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 4.4.4 ``` [Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb) @@ -734,7 +734,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi # -s is for spark-nlp # -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage # by default they are set to the latest -!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 4.4.3 +!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 4.4.4 ``` [Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live @@ -753,9 +753,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP 3. In `Libraries` tab inside your cluster you need to follow these steps: - 3.1. Install New -> PyPI -> `spark-nlp==4.4.3` -> Install + 3.1. Install New -> PyPI -> `spark-nlp==4.4.4` -> Install - 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3` -> Install + 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4` -> Install 4. Now you can attach your notebook to the cluster and use Spark NLP! @@ -806,7 +806,7 @@ A sample of your software configuration in JSON on S3 (must be public access): "spark.kryoserializer.buffer.max": "2000M", "spark.serializer": "org.apache.spark.serializer.KryoSerializer", "spark.driver.maxResultSize": "0", - "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3" + "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4" } }] ``` @@ -815,7 +815,7 @@ A sample of AWS CLI to launch EMR cluster: ```.sh aws emr create-cluster \ ---name "Spark NLP 4.4.3" \ +--name "Spark NLP 4.4.4" \ --release-label emr-6.2.0 \ --applications Name=Hadoop Name=Spark Name=Hive \ --instance-type m4.4xlarge \ @@ -879,7 +879,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \ --enable-component-gateway \ --metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \ --initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \ - --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 + --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 ``` 2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI. @@ -918,7 +918,7 @@ spark = SparkSession.builder .config("spark.kryoserializer.buffer.max", "2000m") .config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained") .config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage") - .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3") + .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4") .getOrCreate() ``` @@ -932,7 +932,7 @@ spark-shell \ --conf spark.kryoserializer.buffer.max=2000M \ --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \ --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \ - --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 + --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 ``` **pyspark:** @@ -945,7 +945,7 @@ pyspark \ --conf spark.kryoserializer.buffer.max=2000M \ --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \ --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \ - --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 + --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 ``` **Databricks:** @@ -1217,7 +1217,7 @@ spark = SparkSession.builder .config("spark.driver.memory", "16G") .config("spark.driver.maxResultSize", "0") .config("spark.kryoserializer.buffer.max", "2000M") - .config("spark.jars", "/tmp/spark-nlp-assembly-4.4.3.jar") + .config("spark.jars", "/tmp/spark-nlp-assembly-4.4.4.jar") .getOrCreate() ``` @@ -1226,7 +1226,7 @@ spark = SparkSession.builder version (3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x) - If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. ( - i.e., `hdfs:///tmp/spark-nlp-assembly-4.4.3.jar`) + i.e., `hdfs:///tmp/spark-nlp-assembly-4.4.4.jar`) Example of using pretrained Models and Pipelines in offline: diff --git a/build.sbt b/build.sbt index ba3b93220c48df..83330ac60cd9ba 100644 --- a/build.sbt +++ b/build.sbt @@ -6,7 +6,7 @@ name := getPackageName(is_silicon, is_gpu, is_aarch64) organization := "com.johnsnowlabs.nlp" -version := "4.4.3" +version := "4.4.4" (ThisBuild / scalaVersion) := scalaVer diff --git a/docs/en/concepts.md b/docs/en/concepts.md index f596fbef56a41e..e3d120d2cc7937 100644 --- a/docs/en/concepts.md +++ b/docs/en/concepts.md @@ -62,7 +62,7 @@ $ java -version $ conda create -n sparknlp python=3.7 -y $ conda activate sparknlp # spark-nlp by default is based on pyspark 3.x -$ pip install spark-nlp==4.4.3 pyspark==3.3.1 jupyter +$ pip install spark-nlp==4.4.4 pyspark==3.3.1 jupyter $ jupyter notebook ``` diff --git a/docs/en/examples.md b/docs/en/examples.md index 66743f9b4c4447..e42a18a2ba03a1 100644 --- a/docs/en/examples.md +++ b/docs/en/examples.md @@ -16,7 +16,7 @@ $ java -version # should be Java 8 (Oracle or OpenJDK) $ conda create -n sparknlp python=3.7 -y $ conda activate sparknlp -$ pip install spark-nlp==4.4.3 pyspark==3.3.1 +$ pip install spark-nlp==4.4.4 pyspark==3.3.1 ``` ## Google Colab Notebook @@ -36,7 +36,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi # -p is for pyspark # -s is for spark-nlp # by default they are set to the latest -!bash colab.sh -p 3.2.3 -s 4.4.3 +!bash colab.sh -p 3.2.3 -s 4.4.4 ``` [Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb) is a live demo on Google Colab that performs named entity recognitions and sentiment analysis by using Spark NLP pretrained pipelines. diff --git a/docs/en/hardware_acceleration.md b/docs/en/hardware_acceleration.md index 220d00bc5823d9..c0a2d66c476f04 100644 --- a/docs/en/hardware_acceleration.md +++ b/docs/en/hardware_acceleration.md @@ -49,7 +49,7 @@ Since the new Transformer models such as BERT for Word and Sentence embeddings a | DeBERTa Large | +477%(5.8x) | | Longformer Base | +52%(1.5x) | -Spark NLP 4.4.3 is built with TensorFlow 2.7.1 and the following NVIDIA® software are only required for GPU support: +Spark NLP 4.4.4 is built with TensorFlow 2.7.1 and the following NVIDIA® software are only required for GPU support: - NVIDIA® GPU drivers version 450.80.02 or higher - CUDA® Toolkit 11.2 diff --git a/docs/en/install.md b/docs/en/install.md index 90395f47dcee79..cdc02ce49bfe58 100644 --- a/docs/en/install.md +++ b/docs/en/install.md @@ -15,22 +15,22 @@ sidebar: ```bash # Install Spark NLP from PyPI -pip install spark-nlp==4.4.3 +pip install spark-nlp==4.4.4 # Install Spark NLP from Anacodna/Conda conda install -c johnsnowlabs spark-nlp # Load Spark NLP with Spark Shell -spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 +spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 # Load Spark NLP with PySpark -pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 +pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 # Load Spark NLP with Spark Submit -spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 +spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 # Load Spark NLP as external JAR after compiling and building Spark NLP by `sbt assembly` -spark-shell --jars spark-nlp-assembly-4.4.3.jar +spark-shell --jars spark-nlp-assembly-4.4.4.jar ``` ## Python @@ -49,7 +49,7 @@ $ java -version # should be Java 8 (Oracle or OpenJDK) $ conda create -n sparknlp python=3.8 -y $ conda activate sparknlp -$ pip install spark-nlp==4.4.3 pyspark==3.3.1 +$ pip install spark-nlp==4.4.4 pyspark==3.3.1 ``` Of course you will need to have jupyter installed in your system: @@ -76,7 +76,7 @@ spark = SparkSession.builder \ .config("spark.driver.memory","16G")\ .config("spark.driver.maxResultSize", "0") \ .config("spark.kryoserializer.buffer.max", "2000M")\ - .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3")\ + .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4")\ .getOrCreate() ``` @@ -91,7 +91,7 @@ spark = SparkSession.builder \ com.johnsnowlabs.nlp spark-nlp_2.12 - 4.4.3 + 4.4.4 ``` @@ -102,7 +102,7 @@ spark = SparkSession.builder \ com.johnsnowlabs.nlp spark-nlp-gpu_2.12 - 4.4.3 + 4.4.4 ``` @@ -113,7 +113,7 @@ spark = SparkSession.builder \ com.johnsnowlabs.nlp spark-nlp-silicon_2.12 - 4.4.3 + 4.4.4 ``` @@ -124,7 +124,7 @@ spark = SparkSession.builder \ com.johnsnowlabs.nlp spark-nlp-aarch64_2.12 - 4.4.3 + 4.4.4 ``` @@ -134,28 +134,28 @@ spark = SparkSession.builder \ ```scala // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp -libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "4.4.3" +libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "4.4.4" ``` **spark-nlp-gpu:** ```scala // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu -libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "4.4.3" +libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "4.4.4" ``` **spark-nlp-silicon:** ```scala // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon -libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "4.4.3" +libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "4.4.4" ``` **spark-nlp-aarch64:** ```scala // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64 -libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "4.4.3" +libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "4.4.4" ``` Maven Central: [https://mvnrepository.com/artifact/com.johnsnowlabs.nlp](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp) @@ -233,7 +233,7 @@ maven coordinates like these: com.johnsnowlabs.nlp spark-nlp-silicon_2.12 - 4.4.3 + 4.4.4 ``` @@ -241,7 +241,7 @@ or in case of sbt: ```scala // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp -libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "4.4.3" +libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "4.4.4" ``` If everything went well, you can now start Spark NLP with the `m1` flag set to `true`: @@ -274,7 +274,7 @@ spark = sparknlp.start(apple_silicon=True) ## Installation for Linux Aarch64 Systems -Starting from version 4.4.3, Spark NLP supports Linux systems running on an aarch64 +Starting from version 4.4.4, Spark NLP supports Linux systems running on an aarch64 processor architecture. The necessary dependencies have been built on Ubuntu 16.04, so a recent system with an environment of at least that will be needed. @@ -318,7 +318,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi # -p is for pyspark # -s is for spark-nlp # by default they are set to the latest -!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 4.4.3 +!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 4.4.4 ``` [Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb) is a live demo on Google Colab that performs named entity recognitions and sentiment analysis by using Spark NLP pretrained pipelines. @@ -337,7 +337,7 @@ Run the following code in Kaggle Kernel and start using spark-nlp right away. ## Databricks Support -Spark NLP 4.4.3 has been tested and is compatible with the following runtimes: +Spark NLP 4.4.4 has been tested and is compatible with the following runtimes: **CPU:** @@ -403,7 +403,7 @@ NOTE: Spark NLP 4.0.x is based on TensorFlow 2.7.x which is compatible with CUDA 3.1. Install New -> PyPI -> `spark-nlp` -> Install - 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3` -> Install + 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4` -> Install 4. Now you can attach your notebook to the cluster and use Spark NLP! @@ -419,7 +419,7 @@ Note: You can import these notebooks by using their URLs. ## EMR Support -Spark NLP 4.4.3 has been tested and is compatible with the following EMR releases: +Spark NLP 4.4.4 has been tested and is compatible with the following EMR releases: - emr-6.2.0 - emr-6.3.0 @@ -477,7 +477,7 @@ A sample of your software configuration in JSON on S3 (must be public access): "spark.kryoserializer.buffer.max": "2000M", "spark.serializer": "org.apache.spark.serializer.KryoSerializer", "spark.driver.maxResultSize": "0", - "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3" + "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4" } } ] @@ -487,7 +487,7 @@ A sample of AWS CLI to launch EMR cluster: ```sh aws emr create-cluster \ ---name "Spark NLP 4.4.3" \ +--name "Spark NLP 4.4.4" \ --release-label emr-6.2.0 \ --applications Name=Hadoop Name=Spark Name=Hive \ --instance-type m4.4xlarge \ @@ -741,7 +741,7 @@ We recommend using `conda` to manage your Python environment on Windows. Now you can use the downloaded binary by navigating to `%SPARK_HOME%\bin` and running -Either create a conda env for python 3.6, install *pyspark==3.3.1 spark-nlp numpy* and use Jupyter/python console, or in the same conda env you can go to spark bin for *pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3*. +Either create a conda env for python 3.6, install *pyspark==3.3.1 spark-nlp numpy* and use Jupyter/python console, or in the same conda env you can go to spark bin for *pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4*. @@ -767,12 +767,12 @@ spark = SparkSession.builder \ .config("spark.driver.memory","16G")\ .config("spark.driver.maxResultSize", "0") \ .config("spark.kryoserializer.buffer.max", "2000M")\ - .config("spark.jars", "/tmp/spark-nlp-assembly-4.4.3.jar")\ + .config("spark.jars", "/tmp/spark-nlp-assembly-4.4.4.jar")\ .getOrCreate() ``` - You can download provided Fat JARs from each [release notes](https://github.com/JohnSnowLabs/spark-nlp/releases), please pay attention to pick the one that suits your environment depending on the device (CPU/GPU) and Apache Spark version (3.x) -- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (i.e., `hdfs:///tmp/spark-nlp-assembly-4.4.3.jar`) +- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (i.e., `hdfs:///tmp/spark-nlp-assembly-4.4.4.jar`) Example of using pretrained Models and Pipelines in offline: diff --git a/docs/en/spark_nlp.md b/docs/en/spark_nlp.md index 7c02b16e89aae8..8911f9c3bc22af 100644 --- a/docs/en/spark_nlp.md +++ b/docs/en/spark_nlp.md @@ -25,7 +25,7 @@ Spark NLP is built on top of **Apache Spark 3.x**. For using Spark NLP you need: **GPU (optional):** -Spark NLP 4.4.3 is built with TensorFlow 2.7.1 and the following NVIDIA® software are only required for GPU support: +Spark NLP 4.4.4 is built with TensorFlow 2.7.1 and the following NVIDIA® software are only required for GPU support: - NVIDIA® GPU drivers version 450.80.02 or higher - CUDA® Toolkit 11.2 diff --git a/examples/docker/README.md b/examples/docker/README.md index 9252576e26830f..cc40ddfef07794 100644 --- a/examples/docker/README.md +++ b/examples/docker/README.md @@ -73,7 +73,7 @@ docker run -it --name sparknlp-container \ --conf "spark.serializer"="org.apache.spark.serializer.KryoSerializer" \ --conf "spark.kryoserializer.buffer.max"="2000M" \ --conf "spark.driver.maxResultSize"="0" \ - --packages "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3" + --packages "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4" ``` To run the shell with GPU support, we use the image from [Jupyter Notebook with GPU @@ -91,5 +91,5 @@ docker run -it --name sparknlp-container \ --conf "spark.serializer"="org.apache.spark.serializer.KryoSerializer" \ --conf "spark.kryoserializer.buffer.max"="2000M" \ --conf "spark.driver.maxResultSize"="0" \ - --packages "com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:4.4.3" + --packages "com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:4.4.4" ``` diff --git a/examples/python/transformers/HuggingFace in Spark NLP - BertForZeroShotClassification.ipynb b/examples/python/transformers/HuggingFace in Spark NLP - BertForZeroShotClassification.ipynb index fba2dadc539b14..d368119dc2abee 100644 --- a/examples/python/transformers/HuggingFace in Spark NLP - BertForZeroShotClassification.ipynb +++ b/examples/python/transformers/HuggingFace in Spark NLP - BertForZeroShotClassification.ipynb @@ -353,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -361,21 +361,7 @@ "id": "udnbTHNj8WR6", "outputId": "5c00752b-c7a0-4bad-b369-5052af7ffcb5" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Installing PySpark 3.2.3 and Spark NLP 4.4.3\n", - "setup Colab for PySpark 3.2.3 and Spark NLP 4.4.3\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 kB\u001b[0m \u001b[31m42.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" - ] - } - ], + "outputs": [], "source": [ "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash" ] diff --git a/examples/python/transformers/HuggingFace in Spark NLP - DistilBertForZeroClassification.ipynb b/examples/python/transformers/HuggingFace in Spark NLP - DistilBertForZeroClassification.ipynb index 139799db44700a..1f4cb19e2478d5 100644 --- a/examples/python/transformers/HuggingFace in Spark NLP - DistilBertForZeroClassification.ipynb +++ b/examples/python/transformers/HuggingFace in Spark NLP - DistilBertForZeroClassification.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "8IXf_Q668WRo" @@ -12,6 +13,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "fDfihUkE8WRr" @@ -29,6 +31,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "vMg3NbLo8WRs" @@ -38,6 +41,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "Ykej1XKH8WRu" @@ -59,8 +63,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.8/5.8 MB\u001b[0m \u001b[31m62.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", @@ -83,6 +87,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "ehfCmKt98WRw" @@ -164,86 +169,86 @@ }, "outputs": [ { - "output_type": "display_data", "data": { - "text/plain": [ - "Downloading (…)solve/main/vocab.txt: 0%| | 0.00/232k [00:00 com.johnsnowlabs.nlp spark-nlp_2.12 - 4.4.3 + 4.4.4 ``` @@ -453,7 +453,7 @@ coordinates: com.johnsnowlabs.nlp spark-nlp-gpu_2.12 - 4.4.3 + 4.4.4 ``` @@ -464,7 +464,7 @@ coordinates: com.johnsnowlabs.nlp spark-nlp-aarch64_2.12 - 4.4.3 + 4.4.4 ``` @@ -475,7 +475,7 @@ coordinates: com.johnsnowlabs.nlp spark-nlp-silicon_2.12 - 4.4.3 + 4.4.4 ``` @@ -485,28 +485,28 @@ coordinates: ```sbtshell // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp -libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "4.4.3" +libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "4.4.4" ``` **spark-nlp-gpu:** ```sbtshell // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu -libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "4.4.3" +libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "4.4.4" ``` **spark-nlp-aarch64:** ```sbtshell // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64 -libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "4.4.3" +libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "4.4.4" ``` **spark-nlp-silicon:** ```sbtshell // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon -libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "4.4.3" +libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "4.4.4" ``` Maven @@ -528,7 +528,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through Pip: ```bash -pip install spark-nlp==4.4.3 +pip install spark-nlp==4.4.4 ``` Conda: @@ -557,7 +557,7 @@ spark = SparkSession.builder .config("spark.driver.memory", "16G") .config("spark.driver.maxResultSize", "0") .config("spark.kryoserializer.buffer.max", "2000M") - .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3") + .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4") .getOrCreate() ``` @@ -628,7 +628,7 @@ Use either one of the following options - Add the following Maven Coordinates to the interpreter's library list ```bash -com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 +com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 ``` - Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is @@ -639,7 +639,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 Apart from the previous step, install the python module through pip ```bash -pip install spark-nlp==4.4.3 +pip install spark-nlp==4.4.4 ``` Or you can install `spark-nlp` from inside Zeppelin by using Conda: @@ -667,7 +667,7 @@ launch the Jupyter from the same Python environment: $ conda create -n sparknlp python=3.8 -y $ conda activate sparknlp # spark-nlp by default is based on pyspark 3.x -$ pip install spark-nlp==4.4.3 pyspark==3.3.1 jupyter +$ pip install spark-nlp==4.4.4 pyspark==3.3.1 jupyter $ jupyter notebook ``` @@ -684,7 +684,7 @@ export PYSPARK_PYTHON=python3 export PYSPARK_DRIVER_PYTHON=jupyter export PYSPARK_DRIVER_PYTHON_OPTS=notebook -pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 +pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 ``` Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp` @@ -711,7 +711,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi # -s is for spark-nlp # -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage # by default they are set to the latest -!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 4.4.3 +!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 4.4.4 ``` [Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb) @@ -734,7 +734,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi # -s is for spark-nlp # -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage # by default they are set to the latest -!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 4.4.3 +!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 4.4.4 ``` [Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live @@ -753,9 +753,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP 3. In `Libraries` tab inside your cluster you need to follow these steps: - 3.1. Install New -> PyPI -> `spark-nlp==4.4.3` -> Install + 3.1. Install New -> PyPI -> `spark-nlp==4.4.4` -> Install - 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3` -> Install + 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4` -> Install 4. Now you can attach your notebook to the cluster and use Spark NLP! @@ -806,7 +806,7 @@ A sample of your software configuration in JSON on S3 (must be public access): "spark.kryoserializer.buffer.max": "2000M", "spark.serializer": "org.apache.spark.serializer.KryoSerializer", "spark.driver.maxResultSize": "0", - "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3" + "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4" } }] ``` @@ -815,7 +815,7 @@ A sample of AWS CLI to launch EMR cluster: ```.sh aws emr create-cluster \ ---name "Spark NLP 4.4.3" \ +--name "Spark NLP 4.4.4" \ --release-label emr-6.2.0 \ --applications Name=Hadoop Name=Spark Name=Hive \ --instance-type m4.4xlarge \ @@ -879,7 +879,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \ --enable-component-gateway \ --metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \ --initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \ - --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 + --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 ``` 2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI. @@ -918,7 +918,7 @@ spark = SparkSession.builder .config("spark.kryoserializer.buffer.max", "2000m") .config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained") .config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage") - .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3") + .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4") .getOrCreate() ``` @@ -932,7 +932,7 @@ spark-shell \ --conf spark.kryoserializer.buffer.max=2000M \ --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \ --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \ - --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 + --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 ``` **pyspark:** @@ -945,7 +945,7 @@ pyspark \ --conf spark.kryoserializer.buffer.max=2000M \ --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \ --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \ - --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3 + --packages com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4 ``` **Databricks:** @@ -1217,7 +1217,7 @@ spark = SparkSession.builder .config("spark.driver.memory", "16G") .config("spark.driver.maxResultSize", "0") .config("spark.kryoserializer.buffer.max", "2000M") - .config("spark.jars", "/tmp/spark-nlp-assembly-4.4.3.jar") + .config("spark.jars", "/tmp/spark-nlp-assembly-4.4.4.jar") .getOrCreate() ``` @@ -1226,7 +1226,7 @@ spark = SparkSession.builder version (3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x) - If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. ( - i.e., `hdfs:///tmp/spark-nlp-assembly-4.4.3.jar`) + i.e., `hdfs:///tmp/spark-nlp-assembly-4.4.4.jar`) Example of using pretrained Models and Pipelines in offline: diff --git a/python/docs/conf.py b/python/docs/conf.py index b96d1cb504268a..0ac5b6ac0781d1 100644 --- a/python/docs/conf.py +++ b/python/docs/conf.py @@ -23,7 +23,7 @@ author = "John Snow Labs" # The full version, including alpha/beta/rc tags -release = "4.4.3" +release = "4.4.4" pyspark_version = "3.2.3" # -- General configuration --------------------------------------------------- diff --git a/python/setup.py b/python/setup.py index 726ddc3a881d3e..8291ce89f2f2c3 100644 --- a/python/setup.py +++ b/python/setup.py @@ -41,7 +41,7 @@ # project code, see # https://packaging.python.org/en/latest/single_source_version.html - version='4.4.3', # Required + version='4.4.4', # Required # This is a one-line description or tagline of what your project does. This # corresponds to the 'Summary' metadata field: diff --git a/python/sparknlp/__init__.py b/python/sparknlp/__init__.py index 60e1715cdcf57a..4c398e87bc221a 100644 --- a/python/sparknlp/__init__.py +++ b/python/sparknlp/__init__.py @@ -128,7 +128,7 @@ def start(gpu=False, The initiated Spark session. """ - current_version = "4.4.3" + current_version = "4.4.4" if params is None: params = {} @@ -298,4 +298,4 @@ def version(): str The current Spark NLP version. """ - return '4.4.3' + return '4.4.4' diff --git a/scripts/colab_setup.sh b/scripts/colab_setup.sh index 3162d860285cf3..19a4d335643617 100644 --- a/scripts/colab_setup.sh +++ b/scripts/colab_setup.sh @@ -1,7 +1,7 @@ #!/bin/bash #default values for pyspark, spark-nlp, and SPARK_HOME -SPARKNLP="4.4.3" +SPARKNLP="4.4.4" PYSPARK="3.2.3" while getopts s:p:g option diff --git a/scripts/kaggle_setup.sh b/scripts/kaggle_setup.sh index 994c1efd030a61..1f2138dcadc232 100644 --- a/scripts/kaggle_setup.sh +++ b/scripts/kaggle_setup.sh @@ -1,7 +1,7 @@ #!/bin/bash #default values for pyspark, spark-nlp, and SPARK_HOME -SPARKNLP="4.4.3" +SPARKNLP="4.4.4" PYSPARK="3.2.3" while getopts s:p:g option diff --git a/scripts/sagemaker_setup.sh b/scripts/sagemaker_setup.sh index 1d032eb4dcfbd0..78bdd2e972f5f6 100644 --- a/scripts/sagemaker_setup.sh +++ b/scripts/sagemaker_setup.sh @@ -1,7 +1,7 @@ #!/bin/bash # Default values for pyspark, spark-nlp, and SPARK_HOME -SPARKNLP="4.4.3" +SPARKNLP="4.4.4" PYSPARK="3.2.3" echo "Setup SageMaker for PySpark $PYSPARK and Spark NLP $SPARKNLP" diff --git a/src/main/scala/com/johnsnowlabs/nlp/SparkNLP.scala b/src/main/scala/com/johnsnowlabs/nlp/SparkNLP.scala index b573c53c146236..ae495ee3dcf908 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/SparkNLP.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/SparkNLP.scala @@ -20,7 +20,7 @@ import org.apache.spark.sql.SparkSession object SparkNLP { - val currentVersion = "4.4.3" + val currentVersion = "4.4.4" val MavenSpark3 = s"com.johnsnowlabs.nlp:spark-nlp_2.12:$currentVersion" val MavenGpuSpark3 = s"com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:$currentVersion" val MavenSparkSilicon = s"com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:$currentVersion" diff --git a/src/main/scala/com/johnsnowlabs/util/Build.scala b/src/main/scala/com/johnsnowlabs/util/Build.scala index 1659cea41c5e33..e8f1667bcda04f 100644 --- a/src/main/scala/com/johnsnowlabs/util/Build.scala +++ b/src/main/scala/com/johnsnowlabs/util/Build.scala @@ -17,5 +17,5 @@ package com.johnsnowlabs.util object Build { - val version: String = "4.4.3" + val version: String = "4.4.4" } From 85e602af2cc9a7a591151fa161f6f60d9bed37b4 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 8 Jun 2023 11:34:43 +0000 Subject: [PATCH 06/11] Update Scala and Python APIs --- docs/api/com/index.html | 8 ++-- .../johnsnowlabs/client/CredentialParams.html | 8 ++-- .../client/aws/AWSAnonymousCredentials.html | 8 ++-- .../client/aws/AWSBasicCredentials.html | 8 ++-- .../client/aws/AWSCredentialsProvider.html | 8 ++-- .../johnsnowlabs/client/aws/AWSGateway.html | 8 ++-- .../client/aws/AWSProfileCredentials.html | 8 ++-- .../client/aws/AWSTokenCredentials.html | 8 ++-- .../johnsnowlabs/client/aws/Credentials.html | 8 ++-- .../com/johnsnowlabs/client/aws/index.html | 8 ++-- .../johnsnowlabs/client/gcp/GCPGateway.html | 8 ++-- .../com/johnsnowlabs/client/gcp/index.html | 8 ++-- docs/api/com/johnsnowlabs/client/index.html | 8 ++-- .../johnsnowlabs/collections/SearchTrie$.html | 8 ++-- .../johnsnowlabs/collections/SearchTrie.html | 8 ++-- .../collections/StorageSearchTrie$.html | 8 ++-- .../collections/StorageSearchTrie.html | 8 ++-- .../com/johnsnowlabs/collections/index.html | 8 ++-- docs/api/com/johnsnowlabs/index.html | 8 ++-- docs/api/com/johnsnowlabs/ml/ai/DeBerta.html | 8 ++-- .../ml/ai/MergeTokenStrategy$.html | 8 ++-- docs/api/com/johnsnowlabs/ml/ai/index.html | 8 ++-- .../ml/ai/util/Generation/Generate.html | 8 ++-- .../ml/ai/util/Generation/Logit/Logit.html | 8 ++-- .../Logit/LogitProcess/LogitProcessor.html | 8 ++-- .../LogitProcess/MinLengthLogitProcessor.html | 8 ++-- .../NoRepeatNgramsLogitProcessor.html | 8 ++-- .../RepetitionPenaltyLogitProcessor.html | 8 ++-- .../Generation/Logit/LogitProcess/index.html | 8 ++-- .../Generation/Logit/LogitProcessorList.html | 8 ++-- .../Logit/LogitWarper/LogitWarper.html | 8 ++-- .../LogitWarper/TemperatureLogitWarper.html | 8 ++-- .../Logit/LogitWarper/TopKLogitWarper.html | 8 ++-- .../Logit/LogitWarper/TopPLogitWarper.html | 8 ++-- .../Generation/Logit/LogitWarper/index.html | 8 ++-- .../ml/ai/util/Generation/Logit/index.html | 8 ++-- .../Generation/Search/BeamHypotheses.html | 8 ++-- .../ai/util/Generation/Search/BeamScorer.html | 8 ++-- .../Generation/Search/BeamSearchScorer.html | 8 ++-- .../ml/ai/util/Generation/Search/index.html | 8 ++-- .../ml/ai/util/Generation/index.html | 8 ++-- .../com/johnsnowlabs/ml/ai/util/index.html | 8 ++-- docs/api/com/johnsnowlabs/ml/crf/Attr.html | 8 ++-- .../com/johnsnowlabs/ml/crf/AttrFeature.html | 8 ++-- .../api/com/johnsnowlabs/ml/crf/AttrStat.html | 8 ++-- .../com/johnsnowlabs/ml/crf/CrfDataset.html | 8 ++-- .../com/johnsnowlabs/ml/crf/CrfParams.html | 8 ++-- .../johnsnowlabs/ml/crf/DatasetEncoder.html | 8 ++-- .../johnsnowlabs/ml/crf/DatasetMetadata.html | 8 ++-- .../johnsnowlabs/ml/crf/DatasetReader$.html | 8 ++-- .../johnsnowlabs/ml/crf/EdgeCalculator$.html | 8 ++-- .../com/johnsnowlabs/ml/crf/FbCalculator.html | 8 ++-- .../api/com/johnsnowlabs/ml/crf/Instance.html | 8 ++-- .../johnsnowlabs/ml/crf/InstanceLabels.html | 8 ++-- .../johnsnowlabs/ml/crf/L2DecayStrategy.html | 8 ++-- .../johnsnowlabs/ml/crf/LinearChainCrf.html | 8 ++-- .../ml/crf/LinearChainCrfModel.html | 8 ++-- .../ml/crf/SerializedDatasetMetadata.html | 8 ++-- .../ml/crf/SerializedLinearChainCrfModel.html | 8 ++-- .../ml/crf/SparseArray$$SeqWrapper.html | 8 ++-- .../com/johnsnowlabs/ml/crf/SparseArray$.html | 8 ++-- .../com/johnsnowlabs/ml/crf/SparseArray.html | 8 ++-- .../ml/crf/TextSentenceAttrs.html | 8 ++-- .../ml/crf/TextSentenceLabels.html | 8 ++-- .../com/johnsnowlabs/ml/crf/Transition.html | 8 ++-- .../com/johnsnowlabs/ml/crf/VectorMath$.html | 8 ++-- .../com/johnsnowlabs/ml/crf/WordAttrs.html | 8 ++-- docs/api/com/johnsnowlabs/ml/crf/index.html | 8 ++-- docs/api/com/johnsnowlabs/ml/index.html | 8 ++-- .../tensorflow/ClassifierDatasetEncoder.html | 8 ++-- .../ClassifierDatasetEncoderParams.html | 8 ++-- .../ml/tensorflow/DatasetEncoderParams.html | 8 ++-- .../johnsnowlabs/ml/tensorflow/Logging.html | 8 ++-- .../ml/tensorflow/ModelSignature.html | 8 ++-- .../johnsnowlabs/ml/tensorflow/NerBatch$.html | 8 ++-- .../johnsnowlabs/ml/tensorflow/NerBatch.html | 8 ++-- .../ml/tensorflow/NerDatasetEncoder.html | 8 ++-- .../ml/tensorflow/ReadTensorflowModel.html | 8 ++-- .../ml/tensorflow/SentenceGrouper.html | 8 ++-- .../ml/tensorflow/TensorResources$.html | 8 ++-- .../ml/tensorflow/TensorResources.html | 8 ++-- .../ml/tensorflow/TensorflowClassifier.html | 8 ++-- .../ml/tensorflow/TensorflowWrapper$.html | 8 ++-- .../ml/tensorflow/TensorflowWrapper.html | 8 ++-- .../johnsnowlabs/ml/tensorflow/Variables.html | 8 ++-- .../ml/tensorflow/WriteTensorflowModel.html | 8 ++-- .../com/johnsnowlabs/ml/tensorflow/index.html | 8 ++-- .../sentencepiece/ReadSentencePieceModel.html | 8 ++-- .../sentencepiece/SentencePieceException.html | 8 ++-- .../sentencepiece/SentencePieceProcessor.html | 8 ++-- .../sentencepiece/SentencePieceWrapper$.html | 8 ++-- .../WriteSentencePieceModel.html | 8 ++-- .../ml/tensorflow/sentencepiece/index.html | 8 ++-- ...delSignatureConstants$$AttentionMask$.html | 8 ++-- ...lSignatureConstants$$AttentionMaskV1$.html | 8 ++-- ...SignatureConstants$$AudioValuesInput$.html | 8 ++-- ...s$$CachedDecoderEncoderAttentionMask$.html | 8 ++-- ...stants$$CachedDecoderEncoderInputIds$.html | 8 ++-- ...eConstants$$CachedDecoderInputCache1$.html | 8 ++-- ...eConstants$$CachedDecoderInputCache2$.html | 8 ++-- ...tureConstants$$CachedDecoderInputIds$.html | 8 ++-- ...natureConstants$$CachedEncoderOutput$.html | 8 ++-- ...gnatureConstants$$CachedLogitsOutput$.html | 8 ++-- ...delSignatureConstants$$CachedOutPut2$.html | 8 ++-- ...delSignatureConstants$$CachedOutput1$.html | 8 ++-- .../sign/ModelSignatureConstants$$DType$.html | 8 ++-- ...atureConstants$$DecoderAttentionMask$.html | 8 ++-- ...nstants$$DecoderEncoderAttentionMask$.html | 8 ++-- ...ureConstants$$DecoderEncoderInputIds$.html | 8 ++-- ...lSignatureConstants$$DecoderInputIds$.html | 8 ++-- ...delSignatureConstants$$DecoderOutput$.html | 8 ++-- .../ModelSignatureConstants$$DimCount$.html | 8 ++-- ...atureConstants$$EncoderAttentionMask$.html | 8 ++-- ...lSignatureConstants$$EncoderInputIds$.html | 8 ++-- ...delSignatureConstants$$EncoderOutput$.html | 8 ++-- ...lSignatureConstants$$EndLogitsOutput$.html | 8 ++-- ...ignatureConstants$$InitCachedOutPut2$.html | 8 ++-- ...ignatureConstants$$InitCachedOutput1$.html | 8 ++-- ...nts$$InitDecoderEncoderAttentionMask$.html | 8 ++-- ...onstants$$InitDecoderEncoderInputIds$.html | 8 ++-- ...natureConstants$$InitDecoderInputIds$.html | 8 ++-- ...SignatureConstants$$InitLogitsOutput$.html | 8 ++-- .../ModelSignatureConstants$$InputIds$.html | 8 ++-- .../ModelSignatureConstants$$InputIdsV1$.html | 8 ++-- ...lSignatureConstants$$LastHiddenState$.html | 8 ++-- ...ignatureConstants$$LastHiddenStateV1$.html | 8 ++-- ...odelSignatureConstants$$LogitsOutput$.html | 8 ++-- .../sign/ModelSignatureConstants$$Name$.html | 8 ++-- ...SignatureConstants$$PixelValuesInput$.html | 8 ++-- ...odelSignatureConstants$$PoolerOutput$.html | 8 ++-- ...elSignatureConstants$$PoolerOutputV1$.html | 8 ++-- ...elSignatureConstants$$SerializedSize$.html | 8 ++-- ...odelSignatureConstants$$ShapeDimList$.html | 8 ++-- ...ignatureConstants$$StartLogitsOutput$.html | 8 ++-- ...lSignatureConstants$$TFInfoDescriptor.html | 8 ++-- ...lSignatureConstants$$TFInfoNameMapper.html | 8 ++-- ...stants$$TapasLogitsAggregationOutput$.html | 8 ++-- ...ignatureConstants$$TapasLogitsOutput$.html | 8 ++-- ...odelSignatureConstants$$TokenTypeIds$.html | 8 ++-- ...elSignatureConstants$$TokenTypeIdsV1$.html | 8 ++-- .../sign/ModelSignatureConstants$.html | 8 ++-- .../sign/ModelSignatureManager$.html | 8 ++-- .../ml/tensorflow/sign/index.html | 8 ++-- .../ml/util/LoadExternalModel$.html | 8 ++-- .../johnsnowlabs/ml/util/ModelEngine$.html | 8 ++-- docs/api/com/johnsnowlabs/ml/util/index.html | 8 ++-- .../johnsnowlabs/nlp/ActivationFunction$.html | 8 ++-- .../nlp/Annotation$$AnnotationContainer.html | 8 ++-- ...nnotation$$extractors$$AnnotationData.html | 8 ++-- .../nlp/Annotation$$extractors$.html | 8 ++-- .../api/com/johnsnowlabs/nlp/Annotation$.html | 8 ++-- docs/api/com/johnsnowlabs/nlp/Annotation.html | 8 ++-- .../AnnotationAudio$$AnnotationContainer.html | 8 ++-- .../nlp/AnnotationAudio$$AudioFields.html | 8 ++-- .../johnsnowlabs/nlp/AnnotationAudio$.html | 8 ++-- .../com/johnsnowlabs/nlp/AnnotationAudio.html | 8 ++-- .../AnnotationImage$$AnnotationContainer.html | 8 ++-- .../nlp/AnnotationImage$$ImageFields.html | 8 ++-- .../johnsnowlabs/nlp/AnnotationImage$.html | 8 ++-- .../com/johnsnowlabs/nlp/AnnotationImage.html | 8 ++-- .../johnsnowlabs/nlp/AnnotatorApproach.html | 8 ++-- .../com/johnsnowlabs/nlp/AnnotatorModel.html | 8 ++-- .../com/johnsnowlabs/nlp/AnnotatorType$.html | 8 ++-- .../com/johnsnowlabs/nlp/AudioAssembler$.html | 8 ++-- .../com/johnsnowlabs/nlp/AudioAssembler.html | 8 ++-- docs/api/com/johnsnowlabs/nlp/CanBeLazy.html | 8 ++-- docs/api/com/johnsnowlabs/nlp/Doc2Chunk$.html | 8 ++-- docs/api/com/johnsnowlabs/nlp/Doc2Chunk.html | 8 ++-- .../johnsnowlabs/nlp/DocumentAssembler$.html | 8 ++-- .../johnsnowlabs/nlp/DocumentAssembler.html | 8 ++-- .../johnsnowlabs/nlp/EmbeddingsFinisher$.html | 8 ++-- .../johnsnowlabs/nlp/EmbeddingsFinisher.html | 8 ++-- .../com/johnsnowlabs/nlp/FeaturesReader.html | 8 ++-- .../com/johnsnowlabs/nlp/FeaturesWriter.html | 8 ++-- docs/api/com/johnsnowlabs/nlp/Finisher$.html | 8 ++-- docs/api/com/johnsnowlabs/nlp/Finisher.html | 8 ++-- .../com/johnsnowlabs/nlp/GraphFinisher.html | 8 ++-- .../nlp/HasAudioFeatureProperties.html | 8 ++-- .../johnsnowlabs/nlp/HasBatchedAnnotate.html | 8 ++-- .../nlp/HasBatchedAnnotateAudio.html | 8 ++-- .../nlp/HasBatchedAnnotateImage.html | 8 ++-- .../nlp/HasCandidateLabelsProperties.html | 8 ++-- .../nlp/HasCaseSensitiveProperties.html | 8 ++-- .../HasClassifierActivationProperties.html | 14 +++---- .../nlp/HasEnableCachingProperties.html | 8 ++-- docs/api/com/johnsnowlabs/nlp/HasEngine.html | 8 ++-- .../api/com/johnsnowlabs/nlp/HasFeatures.html | 8 ++-- .../nlp/HasImageFeatureProperties.html | 8 ++-- .../nlp/HasInputAnnotationCols.html | 8 ++-- .../nlp/HasMultipleInputAnnotationCols.html | 8 ++-- .../nlp/HasOutputAnnotationCol.html | 8 ++-- .../nlp/HasOutputAnnotatorType.html | 8 ++-- .../com/johnsnowlabs/nlp/HasPretrained.html | 8 ++-- .../HasProtectedParams$ProtectedParam.html | 8 ++-- .../johnsnowlabs/nlp/HasProtectedParams.html | 8 ++-- .../com/johnsnowlabs/nlp/HasRecursiveFit.html | 8 ++-- .../nlp/HasRecursiveTransform.html | 8 ++-- .../johnsnowlabs/nlp/HasSimpleAnnotate.html | 8 ++-- .../api/com/johnsnowlabs/nlp/IAnnotation.html | 8 ++-- .../com/johnsnowlabs/nlp/ImageAssembler$.html | 8 ++-- .../com/johnsnowlabs/nlp/ImageAssembler.html | 8 ++-- .../com/johnsnowlabs/nlp/JavaAnnotation.html | 8 ++-- .../com/johnsnowlabs/nlp/LightPipeline.html | 8 ++-- .../nlp/MultiDocumentAssembler$.html | 8 ++-- .../nlp/MultiDocumentAssembler.html | 8 ++-- .../nlp/ParamsAndFeaturesReadable.html | 8 ++-- .../nlp/ParamsAndFeaturesWritable.html | 8 ++-- .../com/johnsnowlabs/nlp/RawAnnotator.html | 8 ++-- .../johnsnowlabs/nlp/RecursivePipeline.html | 8 ++-- .../nlp/RecursivePipelineModel.html | 8 ++-- docs/api/com/johnsnowlabs/nlp/SparkNLP$.html | 8 ++-- .../com/johnsnowlabs/nlp/TableAssembler$.html | 8 ++-- .../com/johnsnowlabs/nlp/TableAssembler.html | 8 ++-- .../com/johnsnowlabs/nlp/TokenAssembler$.html | 8 ++-- .../com/johnsnowlabs/nlp/TokenAssembler.html | 8 ++-- .../nlp/annotators/Chunk2Doc$.html | 8 ++-- .../nlp/annotators/Chunk2Doc.html | 8 ++-- .../nlp/annotators/ChunkTokenizer$.html | 8 ++-- .../nlp/annotators/ChunkTokenizer.html | 8 ++-- .../nlp/annotators/ChunkTokenizerModel$.html | 8 ++-- .../nlp/annotators/ChunkTokenizerModel.html | 8 ++-- .../johnsnowlabs/nlp/annotators/Chunker$.html | 8 ++-- .../johnsnowlabs/nlp/annotators/Chunker.html | 8 ++-- .../nlp/annotators/Date2Chunk$.html | 8 ++-- .../nlp/annotators/Date2Chunk.html | 8 ++-- .../nlp/annotators/DateMatcher$.html | 8 ++-- .../nlp/annotators/DateMatcher.html | 8 ++-- .../nlp/annotators/DateMatcherTranslator.html | 8 ++-- .../DateMatcherTranslatorPolicy.html | 8 ++-- .../nlp/annotators/DateMatcherUtils.html | 8 ++-- .../nlp/annotators/DocumentNormalizer$.html | 8 ++-- .../nlp/annotators/DocumentNormalizer.html | 8 ++-- .../nlp/annotators/EnglishStemmer$.html | 8 ++-- .../nlp/annotators/GraphExtraction.html | 8 ++-- .../nlp/annotators/Lemmatizer$.html | 8 ++-- .../nlp/annotators/Lemmatizer.html | 8 ++-- .../nlp/annotators/LemmatizerModel$.html | 8 ++-- .../nlp/annotators/LemmatizerModel.html | 8 ++-- .../nlp/annotators/LookAroundManager$.html | 8 ++-- .../nlp/annotators/MultiDateMatcher$.html | 8 ++-- .../nlp/annotators/MultiDateMatcher.html | 8 ++-- .../nlp/annotators/MultiDatePolicy$.html | 8 ++-- .../nlp/annotators/NGramGenerator$.html | 8 ++-- .../nlp/annotators/NGramGenerator.html | 8 ++-- .../nlp/annotators/Normalizer$.html | 8 ++-- .../nlp/annotators/Normalizer.html | 8 ++-- .../nlp/annotators/NormalizerModel$.html | 8 ++-- ...alizerModel$TokenizerAndNormalizerMap.html | 8 ++-- .../nlp/annotators/NormalizerModel.html | 8 ++-- .../annotators/PretrainedAnnotations$.html | 8 ++-- .../ReadablePretrainedLemmatizer.html | 8 ++-- ...adablePretrainedStopWordsCleanerModel.html | 8 ++-- .../ReadablePretrainedTextMatcher.html | 8 ++-- .../ReadablePretrainedTokenizer.html | 8 ++-- .../nlp/annotators/RecursiveTokenizer.html | 8 ++-- .../annotators/RecursiveTokenizerModel$.html | 8 ++-- .../annotators/RecursiveTokenizerModel.html | 8 ++-- .../nlp/annotators/RegexMatcher$.html | 8 ++-- .../nlp/annotators/RegexMatcher.html | 8 ++-- .../nlp/annotators/RegexMatcherModel$.html | 8 ++-- .../nlp/annotators/RegexMatcherModel.html | 8 ++-- .../nlp/annotators/RegexTokenizer$.html | 8 ++-- .../nlp/annotators/RegexTokenizer.html | 8 ++-- .../nlp/annotators/SingleDatePolicy$.html | 8 ++-- .../johnsnowlabs/nlp/annotators/Stemmer$.html | 8 ++-- .../johnsnowlabs/nlp/annotators/Stemmer.html | 8 ++-- .../nlp/annotators/StopWordsCleaner$.html | 8 ++-- .../nlp/annotators/StopWordsCleaner.html | 8 ++-- .../nlp/annotators/TextMatcher$.html | 8 ++-- .../nlp/annotators/TextMatcher.html | 8 ++-- .../nlp/annotators/TextMatcherModel$.html | 8 ++-- .../nlp/annotators/TextMatcherModel.html | 8 ++-- .../nlp/annotators/Token2Chunk$.html | 8 ++-- .../nlp/annotators/Token2Chunk.html | 8 ++-- .../nlp/annotators/Tokenizer$.html | 8 ++-- .../nlp/annotators/Tokenizer.html | 8 ++-- .../nlp/annotators/TokenizerModel$.html | 8 ++-- .../nlp/annotators/TokenizerModel.html | 8 ++-- .../nlp/annotators/audio/HubertForCTC$.html | 8 ++-- .../nlp/annotators/audio/HubertForCTC.html | 8 ++-- .../audio/ReadHubertForAudioDLModel.html | 8 ++-- .../audio/ReadWav2Vec2ForAudioDLModel.html | 8 ++-- ...ReadablePretrainedHubertForAudioModel.html | 8 ++-- ...adablePretrainedWav2Vec2ForAudioModel.html | 8 ++-- .../nlp/annotators/audio/Wav2Vec2ForCTC$.html | 8 ++-- .../nlp/annotators/audio/Wav2Vec2ForCTC.html | 8 ++-- .../nlp/annotators/audio/index.html | 8 ++-- .../nlp/annotators/btm/BigTextMatcher$.html | 8 ++-- .../nlp/annotators/btm/BigTextMatcher.html | 8 ++-- .../annotators/btm/BigTextMatcherModel$.html | 8 ++-- .../annotators/btm/BigTextMatcherModel.html | 8 ++-- .../btm/ReadablePretrainedBigTextMatcher.html | 8 ++-- .../nlp/annotators/btm/TMEdgesReadWriter.html | 8 ++-- .../nlp/annotators/btm/TMEdgesReader.html | 8 ++-- .../nlp/annotators/btm/TMNodesReader.html | 8 ++-- .../nlp/annotators/btm/TMNodesWriter.html | 8 ++-- .../nlp/annotators/btm/TMVocabReadWriter.html | 8 ++-- .../nlp/annotators/btm/TMVocabReader.html | 8 ++-- .../nlp/annotators/btm/TrieNode.html | 8 ++-- .../nlp/annotators/btm/index.html | 8 ++-- .../dl/AlbertForQuestionAnswering$.html | 8 ++-- .../dl/AlbertForQuestionAnswering.html | 8 ++-- .../dl/AlbertForSequenceClassification$.html | 8 ++-- .../dl/AlbertForSequenceClassification.html | 14 +++---- .../dl/AlbertForTokenClassification$.html | 8 ++-- .../dl/AlbertForTokenClassification.html | 8 ++-- .../dl/BertForQuestionAnswering$.html | 8 ++-- .../dl/BertForQuestionAnswering.html | 8 ++-- .../dl/BertForSequenceClassification$.html | 8 ++-- .../dl/BertForSequenceClassification.html | 14 +++---- .../dl/BertForTokenClassification$.html | 8 ++-- .../dl/BertForTokenClassification.html | 8 ++-- .../dl/BertForZeroShotClassification$.html | 8 ++-- .../dl/BertForZeroShotClassification.html | 14 +++---- .../dl/CamemBertForQuestionAnswering$.html | 8 ++-- .../dl/CamemBertForQuestionAnswering.html | 8 ++-- .../CamemBertForSequenceClassification$.html | 8 ++-- .../CamemBertForSequenceClassification.html | 14 +++---- .../dl/CamemBertForTokenClassification$.html | 8 ++-- .../dl/CamemBertForTokenClassification.html | 8 ++-- .../classifier/dl/ClassifierDLApproach$.html | 8 ++-- .../classifier/dl/ClassifierDLApproach.html | 8 ++-- .../classifier/dl/ClassifierDLModel$.html | 8 ++-- .../classifier/dl/ClassifierDLModel.html | 8 ++-- .../classifier/dl/ClassifierEncoder.html | 8 ++-- .../classifier/dl/ClassifierMetrics.html | 8 ++-- .../dl/DeBertaForQuestionAnswering$.html | 8 ++-- .../dl/DeBertaForQuestionAnswering.html | 8 ++-- .../dl/DeBertaForSequenceClassification$.html | 8 ++-- .../dl/DeBertaForSequenceClassification.html | 14 +++---- .../dl/DeBertaForTokenClassification$.html | 8 ++-- .../dl/DeBertaForTokenClassification.html | 8 ++-- .../dl/DistilBertForQuestionAnswering$.html | 8 ++-- .../dl/DistilBertForQuestionAnswering.html | 8 ++-- .../DistilBertForSequenceClassification$.html | 8 ++-- .../DistilBertForSequenceClassification.html | 14 +++---- .../dl/DistilBertForTokenClassification$.html | 8 ++-- .../dl/DistilBertForTokenClassification.html | 8 ++-- .../DistilBertForZeroShotClassification$.html | 8 ++-- .../DistilBertForZeroShotClassification.html | 14 +++---- .../dl/LongformerForQuestionAnswering$.html | 8 ++-- .../dl/LongformerForQuestionAnswering.html | 8 ++-- .../LongformerForSequenceClassification$.html | 8 ++-- .../LongformerForSequenceClassification.html | 14 +++---- .../dl/LongformerForTokenClassification$.html | 8 ++-- .../dl/LongformerForTokenClassification.html | 8 ++-- .../dl/MultiClassifierDLApproach.html | 8 ++-- .../dl/MultiClassifierDLModel$.html | 8 ++-- .../classifier/dl/MultiClassifierDLModel.html | 8 ++-- ...ReadAlbertForQuestionAnsweringDLModel.html | 8 ++-- .../dl/ReadAlbertForSequenceDLModel.html | 8 ++-- .../dl/ReadAlbertForTokenDLModel.html | 8 ++-- .../ReadBertForQuestionAnsweringDLModel.html | 8 ++-- .../dl/ReadBertForSequenceDLModel.html | 8 ++-- .../dl/ReadBertForTokenDLModel.html | 8 ++-- .../dl/ReadBertForZeroShotDLModel.html | 8 ++-- .../dl/ReadCamemBertForQADLModel.html | 8 ++-- .../dl/ReadCamemBertForSequenceDLModel.html | 8 ++-- .../dl/ReadCamemBertForTokenDLModel.html | 8 ++-- .../dl/ReadClassifierDLTensorflowModel.html | 8 ++-- ...eadDeBertaForQuestionAnsweringDLModel.html | 8 ++-- .../dl/ReadDeBertaForSequenceDLModel.html | 8 ++-- .../dl/ReadDeBertaForTokenDLModel.html | 8 ++-- ...DistilBertForQuestionAnsweringDLModel.html | 8 ++-- .../dl/ReadDistilBertForSequenceDLModel.html | 8 ++-- .../dl/ReadDistilBertForTokenDLModel.html | 8 ++-- .../dl/ReadDistilBertForZeroShotDLModel.html | 8 ++-- ...LongformerForQuestionAnsweringDLModel.html | 8 ++-- .../dl/ReadLongformerForSequenceDLModel.html | 8 ++-- .../dl/ReadLongformerForTokenDLModel.html | 8 ++-- .../ReadMultiClassifierDLTensorflowModel.html | 8 ++-- ...eadRoBertaForQuestionAnsweringDLModel.html | 8 ++-- .../dl/ReadRoBertaForSequenceDLModel.html | 8 ++-- .../dl/ReadRoBertaForTokenDLModel.html | 8 ++-- .../dl/ReadRoBertaForZeroShotDLModel.html | 8 ++-- .../dl/ReadSentimentDLTensorflowModel.html | 8 ++-- .../ReadTapasForQuestionAnsweringDLModel.html | 8 ++-- ...XlmRoBertaForQuestionAnsweringDLModel.html | 8 ++-- .../dl/ReadXlmRoBertaForSequenceDLModel.html | 8 ++-- .../dl/ReadXlmRoBertaForTokenDLModel.html | 8 ++-- .../dl/ReadXlnetForSequenceDLModel.html | 8 ++-- .../dl/ReadXlnetForTokenDLModel.html | 8 ++-- .../ReadablePretrainedAlbertForQAModel.html | 8 ++-- ...dablePretrainedAlbertForSequenceModel.html | 8 ++-- ...ReadablePretrainedAlbertForTokenModel.html | 8 ++-- .../dl/ReadablePretrainedBertForQAModel.html | 8 ++-- ...eadablePretrainedBertForSequenceModel.html | 8 ++-- .../ReadablePretrainedBertForTokenModel.html | 8 ++-- ...eadablePretrainedBertForZeroShotModel.html | 8 ++-- ...ReadablePretrainedCamemBertForQAModel.html | 8 ++-- ...lePretrainedCamemBertForSequenceModel.html | 8 ++-- ...dablePretrainedCamemBertForTokenModel.html | 8 ++-- .../dl/ReadablePretrainedClassifierDL.html | 8 ++-- .../ReadablePretrainedDeBertaForQAModel.html | 8 ++-- ...ablePretrainedDeBertaForSequenceModel.html | 8 ++-- ...eadablePretrainedDeBertaForTokenModel.html | 8 ++-- ...eadablePretrainedDistilBertForQAModel.html | 8 ++-- ...ePretrainedDistilBertForSequenceModel.html | 8 ++-- ...ablePretrainedDistilBertForTokenModel.html | 8 ++-- ...ePretrainedDistilBertForZeroShotModel.html | 8 ++-- ...eadablePretrainedLongformerForQAModel.html | 8 ++-- ...ePretrainedLongformerForSequenceModel.html | 8 ++-- ...ablePretrainedLongformerForTokenModel.html | 8 ++-- .../ReadablePretrainedMultiClassifierDL.html | 8 ++-- .../ReadablePretrainedRoBertaForQAModel.html | 8 ++-- ...ablePretrainedRoBertaForSequenceModel.html | 8 ++-- ...eadablePretrainedRoBertaForTokenModel.html | 8 ++-- ...ablePretrainedRoBertaForZeroShotModel.html | 8 ++-- .../dl/ReadablePretrainedSentimentDL.html | 8 ++-- .../dl/ReadablePretrainedTapasForQAModel.html | 8 ++-- ...eadablePretrainedXlmRoBertaForQAModel.html | 8 ++-- ...ePretrainedXlmRoBertaForSequenceModel.html | 8 ++-- ...ablePretrainedXlmRoBertaForTokenModel.html | 8 ++-- ...adablePretrainedXlnetForSequenceModel.html | 8 ++-- .../ReadablePretrainedXlnetForTokenModel.html | 8 ++-- .../dl/RoBertaForQuestionAnswering$.html | 8 ++-- .../dl/RoBertaForQuestionAnswering.html | 8 ++-- .../dl/RoBertaForSequenceClassification$.html | 8 ++-- .../dl/RoBertaForSequenceClassification.html | 14 +++---- .../dl/RoBertaForTokenClassification$.html | 8 ++-- .../dl/RoBertaForTokenClassification.html | 8 ++-- .../dl/RoBertaForZeroShotClassification$.html | 8 ++-- .../dl/RoBertaForZeroShotClassification.html | 14 +++---- .../classifier/dl/SentimentApproach$.html | 8 ++-- .../classifier/dl/SentimentDLApproach.html | 8 ++-- .../classifier/dl/SentimentDLModel$.html | 8 ++-- .../classifier/dl/SentimentDLModel.html | 8 ++-- .../dl/TapasForQuestionAnswering$.html | 8 ++-- .../dl/TapasForQuestionAnswering.html | 8 ++-- .../dl/XlmRoBertaForQuestionAnswering$.html | 8 ++-- .../dl/XlmRoBertaForQuestionAnswering.html | 8 ++-- .../XlmRoBertaForSequenceClassification$.html | 8 ++-- .../XlmRoBertaForSequenceClassification.html | 14 +++---- .../dl/XlmRoBertaForTokenClassification$.html | 8 ++-- .../dl/XlmRoBertaForTokenClassification.html | 8 ++-- .../dl/XlnetForSequenceClassification$.html | 8 ++-- .../dl/XlnetForSequenceClassification.html | 14 +++---- .../dl/XlnetForTokenClassification$.html | 8 ++-- .../dl/XlnetForTokenClassification.html | 8 ++-- .../nlp/annotators/classifier/dl/index.html | 8 ++-- .../nlp/annotators/classifier/index.html | 8 ++-- .../nlp/annotators/common/Annotated$.html | 8 ++-- .../nlp/annotators/common/Annotated.html | 8 ++-- .../nlp/annotators/common/ChunkSplit$.html | 8 ++-- .../nlp/annotators/common/ConllSentence.html | 8 ++-- .../DatasetHelpers$$DataFrameHelper.html | 8 ++-- .../annotators/common/DatasetHelpers$.html | 8 ++-- .../annotators/common/DependencyParsed$.html | 8 ++-- .../common/DependencyParsedSentence.html | 8 ++-- .../common/EmbeddingsWithSentence$.html | 8 ++-- .../annotators/common/IndexedTaggedWord.html | 8 ++-- .../nlp/annotators/common/IndexedToken.html | 8 ++-- .../nlp/annotators/common/InfixToken$.html | 8 ++-- .../nlp/annotators/common/InfixToken.html | 8 ++-- .../LabeledDependency$$DependencyInfo.html | 8 ++-- .../annotators/common/LabeledDependency$.html | 8 ++-- .../nlp/annotators/common/NerTagged$.html | 8 ++-- .../nlp/annotators/common/PosTagged$.html | 8 ++-- .../nlp/annotators/common/PrefixedToken$.html | 8 ++-- .../nlp/annotators/common/PrefixedToken.html | 8 ++-- .../common/PreprocessingParser.html | 8 ++-- .../nlp/annotators/common/Sentence$.html | 8 ++-- .../nlp/annotators/common/Sentence.html | 8 ++-- .../nlp/annotators/common/SentenceSplit$.html | 8 ++-- .../nlp/annotators/common/SuffixedToken$.html | 8 ++-- .../nlp/annotators/common/SuffixedToken.html | 8 ++-- .../nlp/annotators/common/TableData$.html | 8 ++-- .../nlp/annotators/common/TableData.html | 8 ++-- .../nlp/annotators/common/Tagged.html | 8 ++-- .../annotators/common/TaggedSentence$.html | 8 ++-- .../nlp/annotators/common/TaggedSentence.html | 8 ++-- .../nlp/annotators/common/TaggedWord.html | 8 ++-- .../nlp/annotators/common/TokenPiece.html | 8 ++-- .../common/TokenPieceEmbeddings$.html | 8 ++-- .../common/TokenPieceEmbeddings.html | 8 ++-- .../annotators/common/TokenizedSentence.html | 8 ++-- .../common/TokenizedWithSentence$.html | 8 ++-- .../annotators/common/WordWithDependency.html | 8 ++-- .../common/WordpieceEmbeddingsSentence$.html | 8 ++-- .../common/WordpieceEmbeddingsSentence.html | 8 ++-- .../common/WordpieceTokenized$.html | 8 ++-- .../common/WordpieceTokenizedSentence.html | 8 ++-- .../nlp/annotators/common/index.html | 8 ++-- .../ReadSpanBertCorefTensorflowModel.html | 8 ++-- .../ReadablePretrainedSpanBertCorefModel.html | 8 ++-- .../annotators/coref/SpanBertCorefModel$.html | 8 ++-- .../annotators/coref/SpanBertCorefModel.html | 8 ++-- .../nlp/annotators/coref/index.html | 8 ++-- .../cv/ConvNextForImageClassification$.html | 8 ++-- .../cv/ConvNextForImageClassification.html | 8 ++-- .../cv/ReadConvNextForImageDLModel.html | 8 ++-- .../cv/ReadSwinForImageDLModel.html | 8 ++-- .../annotators/cv/ReadViTForImageDLModel.html | 8 ++-- ...adablePretrainedConvNextForImageModel.html | 8 ++-- .../ReadablePretrainedSwinForImageModel.html | 8 ++-- .../ReadablePretrainedViTForImageModel.html | 8 ++-- .../cv/SwinForImageClassification$.html | 8 ++-- .../cv/SwinForImageClassification.html | 8 ++-- .../cv/ViTForImageClassification$.html | 8 ++-- .../cv/ViTForImageClassification.html | 8 ++-- .../johnsnowlabs/nlp/annotators/cv/index.html | 8 ++-- .../er/AhoCorasickAutomaton$Node.html | 8 ++-- .../annotators/er/AhoCorasickAutomaton.html | 8 ++-- .../nlp/annotators/er/EntityPattern.html | 8 ++-- .../annotators/er/EntityRulerApproach.html | 8 ++-- .../annotators/er/EntityRulerFeatures.html | 8 ++-- .../nlp/annotators/er/EntityRulerModel$.html | 8 ++-- .../nlp/annotators/er/EntityRulerModel.html | 8 ++-- .../nlp/annotators/er/EntityRulerUtil$.html | 8 ++-- .../annotators/er/FlattenEntityPattern.html | 8 ++-- .../nlp/annotators/er/PatternsReadWriter.html | 8 ++-- .../nlp/annotators/er/PatternsReader.html | 8 ++-- .../er/ReadablePretrainedEntityRuler.html | 8 ++-- .../er/RegexPatternsReadWriter.html | 8 ++-- .../annotators/er/RegexPatternsReader.html | 8 ++-- .../johnsnowlabs/nlp/annotators/er/index.html | 8 ++-- .../johnsnowlabs/nlp/annotators/index.html | 8 ++-- .../nlp/annotators/keyword/index.html | 8 ++-- .../keyword/yake/YakeKeywordExtraction$.html | 8 ++-- .../keyword/yake/YakeKeywordExtraction.html | 8 ++-- .../annotators/keyword/yake/YakeParams.html | 8 ++-- .../nlp/annotators/keyword/yake/index.html | 8 ++-- .../annotators/keyword/yake/util/Token.html | 8 ++-- .../keyword/yake/util/Utilities$.html | 8 ++-- .../annotators/keyword/yake/util/index.html | 8 ++-- .../annotators/ld/dl/LanguageDetectorDL$.html | 8 ++-- .../annotators/ld/dl/LanguageDetectorDL.html | 8 ++-- ...ReadLanguageDetectorDLTensorflowModel.html | 8 ++-- ...ablePretrainedLanguageDetectorDLModel.html | 8 ++-- .../nlp/annotators/ld/dl/index.html | 8 ++-- .../johnsnowlabs/nlp/annotators/ld/index.html | 8 ++-- .../nlp/annotators/ner/ModelMetrics$.html | 8 ++-- .../nlp/annotators/ner/NamedEntity.html | 8 ++-- .../nlp/annotators/ner/NerApproach.html | 8 ++-- .../nlp/annotators/ner/NerConverter$.html | 8 ++-- .../nlp/annotators/ner/NerConverter.html | 8 ++-- .../nlp/annotators/ner/NerOverwriter$.html | 8 ++-- .../nlp/annotators/ner/NerOverwriter.html | 8 ++-- .../nlp/annotators/ner/NerTagsEncoding$.html | 8 ++-- .../nlp/annotators/ner/Verbose$.html | 8 ++-- .../ner/crf/DictionaryFeatures$.html | 8 ++-- .../ner/crf/DictionaryFeatures.html | 8 ++-- .../ner/crf/FeatureGenerator$TokenType$.html | 8 ++-- .../annotators/ner/crf/FeatureGenerator.html | 8 ++-- .../annotators/ner/crf/NerCrfApproach$.html | 8 ++-- .../annotators/ner/crf/NerCrfApproach.html | 8 ++-- .../nlp/annotators/ner/crf/NerCrfModel$.html | 8 ++-- .../nlp/annotators/ner/crf/NerCrfModel.html | 8 ++-- .../ner/crf/ReadablePretrainedNerCrf.html | 8 ++-- .../nlp/annotators/ner/crf/index.html | 8 ++-- .../nlp/annotators/ner/dl/LoadsContrib$.html | 8 ++-- .../nlp/annotators/ner/dl/NerDLApproach$.html | 8 ++-- .../nlp/annotators/ner/dl/NerDLApproach.html | 8 ++-- .../nlp/annotators/ner/dl/NerDLModel$.html | 8 ++-- .../nlp/annotators/ner/dl/NerDLModel.html | 8 ++-- .../ner/dl/NerDLModelPythonReader$.html | 8 ++-- .../ner/dl/ReadZeroShotNerDLModel.html | 8 ++-- .../ner/dl/ReadablePretrainedNerDL.html | 8 ++-- .../ner/dl/ReadablePretrainedZeroShotNer.html | 8 ++-- .../nlp/annotators/ner/dl/ReadsNERGraph.html | 8 ++-- .../annotators/ner/dl/WithGraphResolver.html | 8 ++-- .../annotators/ner/dl/ZeroShotNerModel$.html | 8 ++-- .../annotators/ner/dl/ZeroShotNerModel.html | 8 ++-- .../nlp/annotators/ner/dl/index.html | 8 ++-- .../nlp/annotators/ner/index.html | 8 ++-- ...lizableFormat$$SerializableDateFormat.html | 8 ++-- .../AnnotatorParam$SerializableFormat$.html | 8 ++-- .../nlp/annotators/param/AnnotatorParam.html | 8 ++-- .../annotators/param/EvaluationDLParams.html | 8 ++-- .../param/ExternalResourceParam.html | 8 ++-- .../param/SerializedAnnotatorComponent.html | 8 ++-- .../param/WritableAnnotatorComponent.html | 8 ++-- .../nlp/annotators/param/index.html | 8 ++-- .../parser/dep/DependencyParserApproach$.html | 8 ++-- .../parser/dep/DependencyParserApproach.html | 8 ++-- .../parser/dep/DependencyParserModel$.html | 8 ++-- .../parser/dep/DependencyParserModel.html | 8 ++-- .../GreedyTransition/DependencyMaker$.html | 8 ++-- .../DependencyMaker$CurrentState.html | 8 ++-- .../DependencyMaker$ParseState.html | 8 ++-- .../dep/GreedyTransition/DependencyMaker.html | 8 ++-- .../GreedyTransitionApproach$.html | 8 ++-- .../parser/dep/GreedyTransition/index.html | 8 ++-- .../GreedyTransition/package$$Feature.html | 8 ++-- .../GreedyTransition/package$$WordData.html | 8 ++-- .../parser/dep/Perceptron$WeightLearner.html | 8 ++-- .../nlp/annotators/parser/dep/Perceptron.html | 8 ++-- .../dep/ReadablePretrainedDependency.html | 8 ++-- .../annotators/parser/dep/TagDictionary$.html | 8 ++-- .../nlp/annotators/parser/dep/Tagger$.html | 8 ++-- .../nlp/annotators/parser/dep/Tagger.html | 8 ++-- .../nlp/annotators/parser/dep/index.html | 8 ++-- .../nlp/annotators/parser/index.html | 8 ++-- .../annotators/parser/typdep/ConllData.html | 8 ++-- .../parser/typdep/DependencyArcList.html | 8 ++-- .../parser/typdep/DependencyInstance.html | 8 ++-- .../parser/typdep/DependencyPipe.html | 8 ++-- .../parser/typdep/LocalFeatureData.html | 8 ++-- .../parser/typdep/LowRankTensor.html | 8 ++-- .../nlp/annotators/parser/typdep/Options.html | 8 ++-- .../annotators/parser/typdep/Parameters.html | 8 ++-- .../parser/typdep/PredictionParameters.html | 8 ++-- .../ReadablePretrainedTypedDependency.html | 8 ++-- .../parser/typdep/TrainDependencies.html | 8 ++-- .../annotators/parser/typdep/TrainFile.html | 8 ++-- .../parser/typdep/TypedDependencyParser.html | 8 ++-- .../TypedDependencyParserApproach$.html | 8 ++-- .../typdep/TypedDependencyParserApproach.html | 8 ++-- .../typdep/TypedDependencyParserModel$.html | 8 ++-- .../typdep/TypedDependencyParserModel.html | 8 ++-- .../typdep/feature/FeatureTemplate.html | 8 ++-- .../feature/SyntacticFeatureFactory.html | 8 ++-- .../parser/typdep/feature/index.html | 8 ++-- .../nlp/annotators/parser/typdep/index.html | 8 ++-- .../parser/typdep/io/Conll09Reader.html | 8 ++-- .../parser/typdep/io/ConllUReader.html | 8 ++-- .../parser/typdep/io/ConllWriter.html | 8 ++-- .../parser/typdep/io/DependencyReader.html | 8 ++-- .../annotators/parser/typdep/io/index.html | 8 ++-- .../parser/typdep/util/Alphabet.html | 8 ++-- .../parser/typdep/util/Collector.html | 8 ++-- .../parser/typdep/util/DependencyLabel.html | 8 ++-- .../parser/typdep/util/Dictionary.html | 8 ++-- .../parser/typdep/util/DictionarySet.html | 8 ++-- .../parser/typdep/util/FeatureVector.html | 8 ++-- .../parser/typdep/util/ScoreCollector.html | 8 ++-- .../annotators/parser/typdep/util/Utils.html | 8 ++-- .../annotators/parser/typdep/util/index.html | 8 ++-- .../nlp/annotators/pos/index.html | 8 ++-- .../pos/perceptron/AveragedPerceptron.html | 8 ++-- .../pos/perceptron/PerceptronApproach$.html | 8 ++-- .../pos/perceptron/PerceptronApproach.html | 8 ++-- .../PerceptronApproachDistributed$.html | 8 ++-- .../PerceptronApproachDistributed.html | 8 ++-- .../pos/perceptron/PerceptronModel$.html | 8 ++-- .../pos/perceptron/PerceptronModel.html | 8 ++-- .../perceptron/PerceptronPredictionUtils.html | 8 ++-- .../perceptron/PerceptronTrainingUtils.html | 8 ++-- .../pos/perceptron/PerceptronUtils.html | 8 ++-- .../ReadablePretrainedPerceptron.html | 8 ++-- .../StringMapStringDoubleAccumulator.html | 8 ++-- .../perceptron/TrainingPerceptronLegacy.html | 8 ++-- .../TupleKeyLongDoubleMapAccumulator.html | 8 ++-- .../nlp/annotators/pos/perceptron/index.html | 8 ++-- .../sbd/SentenceDetectorParams.html | 8 ++-- .../nlp/annotators/sbd/index.html | 8 ++-- .../sbd/pragmatic/CustomPragmaticMethod.html | 8 ++-- .../sbd/pragmatic/DefaultPragmaticMethod.html | 8 ++-- .../sbd/pragmatic/MixedPragmaticMethod.html | 8 ++-- .../pragmatic/PragmaticContentFormatter$.html | 8 ++-- .../pragmatic/PragmaticContentFormatter.html | 8 ++-- .../sbd/pragmatic/PragmaticDictionaries$.html | 8 ++-- .../sbd/pragmatic/PragmaticMethod.html | 8 ++-- .../pragmatic/PragmaticSentenceExtractor.html | 8 ++-- .../sbd/pragmatic/PragmaticSymbols$.html | 8 ++-- .../annotators/sbd/pragmatic/RuleSymbols.html | 8 ++-- .../sbd/pragmatic/SentenceDetector$.html | 8 ++-- .../sbd/pragmatic/SentenceDetector.html | 8 ++-- .../nlp/annotators/sbd/pragmatic/index.html | 8 ++-- .../nlp/annotators/sda/index.html | 8 ++-- .../sda/pragmatic/PragmaticScorer.html | 8 ++-- .../sda/pragmatic/SentimentDetector$.html | 8 ++-- .../sda/pragmatic/SentimentDetector.html | 8 ++-- .../pragmatic/SentimentDetectorModel$.html | 8 ++-- .../sda/pragmatic/SentimentDetectorModel.html | 8 ++-- .../nlp/annotators/sda/pragmatic/index.html | 8 ++-- .../sda/vivekn/ReadablePretrainedVivekn.html | 8 ++-- .../sda/vivekn/ViveknSentimentApproach.html | 8 ++-- .../sda/vivekn/ViveknSentimentModel$.html | 8 ++-- .../sda/vivekn/ViveknSentimentModel.html | 8 ++-- .../sda/vivekn/ViveknSentimentUtils.html | 8 ++-- .../nlp/annotators/sda/vivekn/index.html | 8 ++-- .../sentence_detector_dl/Metrics.html | 8 ++-- .../ReadablePretrainedSentenceDetectorDL.html | 8 ++-- .../ReadsSentenceDetectorDLGraph.html | 8 ++-- .../SentenceDetectorDLApproach.html | 8 ++-- .../SentenceDetectorDLEncoder$.html | 8 ++-- .../SentenceDetectorDLEncoder.html | 8 ++-- .../SentenceDetectorDLEncoderParam.html | 8 ++-- .../SentenceDetectorDLModel$.html | 8 ++-- .../SentenceDetectorDLModel.html | 8 ++-- .../sentence_detector_dl/index.html | 8 ++-- .../annotators/seq2seq/BartTransformer$.html | 8 ++-- .../annotators/seq2seq/BartTransformer.html | 8 ++-- .../annotators/seq2seq/GPT2Transformer$.html | 8 ++-- .../annotators/seq2seq/GPT2Transformer.html | 8 ++-- .../seq2seq/MarianTransformer$.html | 8 ++-- .../annotators/seq2seq/MarianTransformer.html | 8 ++-- .../seq2seq/ReadBartTransformerDLModel.html | 8 ++-- .../seq2seq/ReadGPT2TransformerDLModel.html | 8 ++-- .../seq2seq/ReadMarianMTDLModel.html | 8 ++-- .../seq2seq/ReadT5TransformerDLModel.html | 8 ++-- ...eadablePretrainedBartTransformerModel.html | 8 ++-- ...eadablePretrainedGPT2TransformerModel.html | 8 ++-- .../ReadablePretrainedMarianMTModel.html | 8 ++-- .../ReadablePretrainedT5TransformerModel.html | 8 ++-- .../annotators/seq2seq/T5Transformer$.html | 8 ++-- .../nlp/annotators/seq2seq/T5Transformer.html | 8 ++-- .../nlp/annotators/seq2seq/index.html | 8 ++-- .../spell/context/CandidateStrategy$.html | 8 ++-- ...ntextSpellCheckerApproach$ArrayHelper.html | 8 ++-- .../context/ContextSpellCheckerApproach.html | 8 ++-- .../context/ContextSpellCheckerModel$.html | 8 ++-- .../ContextSpellCheckerModel$StringTools.html | 8 ++-- .../context/ContextSpellCheckerModel.html | 8 ++-- .../spell/context/HasTransducerFeatures.html | 8 ++-- .../spell/context/LangModelSentence.html | 8 ++-- .../ReadablePretrainedContextSpell.html | 8 ++-- .../context/ReadsLanguageModelGraph.html | 8 ++-- .../spell/context/WeightedLevenshtein.html | 8 ++-- .../nlp/annotators/spell/context/index.html | 8 ++-- .../spell/context/parser/AgeToken.html | 8 ++-- .../spell/context/parser/DateToken.html | 8 ++-- .../context/parser/GenericRegexParser.html | 8 ++-- .../context/parser/GenericVocabParser.html | 8 ++-- .../spell/context/parser/LocationClass.html | 8 ++-- .../spell/context/parser/MainVocab.html | 8 ++-- .../spell/context/parser/MedicationClass.html | 8 ++-- .../spell/context/parser/NamesClass.html | 8 ++-- .../spell/context/parser/NumberToken.html | 8 ++-- .../spell/context/parser/RegexParser.html | 8 ++-- .../context/parser/SerializableClass.html | 8 ++-- .../context/parser/SpecialClassParser.html | 8 ++-- .../context/parser/TransducerSeqFeature.html | 8 ++-- .../spell/context/parser/UnitToken.html | 8 ++-- .../spell/context/parser/VocabParser.html | 8 ++-- .../spell/context/parser/index.html | 8 ++-- .../nlp/annotators/spell/index.html | 8 ++-- .../spell/norvig/NorvigSweetingApproach$.html | 8 ++-- .../spell/norvig/NorvigSweetingApproach.html | 8 ++-- .../spell/norvig/NorvigSweetingModel$.html | 8 ++-- .../spell/norvig/NorvigSweetingModel.html | 8 ++-- .../spell/norvig/NorvigSweetingParams.html | 8 ++-- .../norvig/ReadablePretrainedNorvig.html | 8 ++-- .../nlp/annotators/spell/norvig/index.html | 8 ++-- .../ReadablePretrainedSymmetric.html | 8 ++-- .../symmetric/SymmetricDeleteApproach$.html | 8 ++-- .../symmetric/SymmetricDeleteApproach.html | 8 ++-- .../symmetric/SymmetricDeleteModel$.html | 8 ++-- .../SymmetricDeleteModel$SuggestedWord.html | 8 ++-- .../spell/symmetric/SymmetricDeleteModel.html | 8 ++-- .../symmetric/SymmetricDeleteParams.html | 8 ++-- .../nlp/annotators/spell/symmetric/index.html | 8 ++-- .../nlp/annotators/spell/util/Utilities$.html | 8 ++-- .../nlp/annotators/spell/util/index.html | 8 ++-- .../nlp/annotators/tapas/TapasCellDate$.html | 8 ++-- .../nlp/annotators/tapas/TapasCellDate.html | 8 ++-- .../nlp/annotators/tapas/TapasCellValue$.html | 8 ++-- .../nlp/annotators/tapas/TapasCellValue.html | 8 ++-- .../nlp/annotators/tapas/TapasEncoder.html | 8 ++-- .../nlp/annotators/tapas/TapasInputData.html | 8 ++-- .../tapas/TapasNumericRelation$.html | 8 ++-- .../tapas/TapasNumericValueSpan$.html | 8 ++-- .../tapas/TapasNumericValueSpan.html | 8 ++-- .../nlp/annotators/tapas/index.html | 8 ++-- .../tokenizer/bpe/BartTokenizer.html | 8 ++-- .../tokenizer/bpe/BpeTokenizer$.html | 8 ++-- .../tokenizer/bpe/Gpt2Tokenizer.html | 8 ++-- .../tokenizer/bpe/RobertaTokenizer.html | 8 ++-- .../tokenizer/bpe/SpecialToken.html | 8 ++-- .../nlp/annotators/tokenizer/bpe/index.html | 8 ++-- .../nlp/annotators/tokenizer/index.html | 8 ++-- .../ws/ReadablePretrainedWordSegmenter.html | 8 ++-- .../nlp/annotators/ws/TagsType$.html | 8 ++-- .../annotators/ws/WordSegmenterApproach$.html | 8 ++-- .../annotators/ws/WordSegmenterApproach.html | 8 ++-- .../annotators/ws/WordSegmenterModel$.html | 8 ++-- .../nlp/annotators/ws/WordSegmenterModel.html | 8 ++-- .../johnsnowlabs/nlp/annotators/ws/index.html | 8 ++-- .../nlp/embeddings/AlbertEmbeddings$.html | 8 ++-- .../nlp/embeddings/AlbertEmbeddings.html | 8 ++-- .../nlp/embeddings/BertEmbeddings$.html | 8 ++-- .../nlp/embeddings/BertEmbeddings.html | 8 ++-- .../embeddings/BertSentenceEmbeddings$.html | 8 ++-- .../embeddings/BertSentenceEmbeddings.html | 8 ++-- .../nlp/embeddings/CamemBertEmbeddings$.html | 8 ++-- .../nlp/embeddings/CamemBertEmbeddings.html | 8 ++-- .../nlp/embeddings/ChunkEmbeddings$.html | 8 ++-- .../nlp/embeddings/ChunkEmbeddings.html | 8 ++-- .../nlp/embeddings/DeBertaEmbeddings$.html | 8 ++-- .../nlp/embeddings/DeBertaEmbeddings.html | 8 ++-- .../nlp/embeddings/DistilBertEmbeddings$.html | 8 ++-- .../nlp/embeddings/DistilBertEmbeddings.html | 8 ++-- .../nlp/embeddings/Doc2VecApproach$.html | 8 ++-- .../nlp/embeddings/Doc2VecApproach.html | 8 ++-- .../nlp/embeddings/Doc2VecModel$.html | 8 ++-- .../nlp/embeddings/Doc2VecModel.html | 8 ++-- .../nlp/embeddings/ElmoEmbeddings$.html | 8 ++-- .../nlp/embeddings/ElmoEmbeddings.html | 8 ++-- .../EmbeddingsCoverage$CoverageResult.html | 8 ++-- .../nlp/embeddings/EmbeddingsCoverage.html | 8 ++-- .../embeddings/HasEmbeddingsProperties.html | 8 ++-- .../nlp/embeddings/LongformerEmbeddings$.html | 8 ++-- .../nlp/embeddings/LongformerEmbeddings.html | 8 ++-- .../PoolingStrategy$$AnnotatorType$.html | 8 ++-- .../nlp/embeddings/PoolingStrategy$.html | 8 ++-- .../nlp/embeddings/ReadAlbertDLModel.html | 8 ++-- .../nlp/embeddings/ReadBertDLModel.html | 8 ++-- .../embeddings/ReadBertSentenceDLModel.html | 8 ++-- .../nlp/embeddings/ReadCamemBertDLModel.html | 8 ++-- .../nlp/embeddings/ReadDeBertaDLModel.html | 8 ++-- .../nlp/embeddings/ReadDistilBertDLModel.html | 8 ++-- .../nlp/embeddings/ReadElmoDLModel.html | 8 ++-- .../nlp/embeddings/ReadLongformerDLModel.html | 8 ++-- .../nlp/embeddings/ReadRobertaDLModel.html | 8 ++-- .../ReadRobertaSentenceDLModel.html | 8 ++-- .../nlp/embeddings/ReadUSEDLModel.html | 8 ++-- .../nlp/embeddings/ReadXlmRobertaDLModel.html | 8 ++-- .../ReadXlmRobertaSentenceDLModel.html | 8 ++-- .../nlp/embeddings/ReadXlnetDLModel.html | 8 ++-- .../ReadablePretrainedAlbertModel.html | 8 ++-- .../ReadablePretrainedBertModel.html | 8 ++-- .../ReadablePretrainedBertSentenceModel.html | 8 ++-- .../ReadablePretrainedCamemBertModel.html | 8 ++-- .../ReadablePretrainedDeBertaModel.html | 8 ++-- .../ReadablePretrainedDistilBertModel.html | 8 ++-- .../embeddings/ReadablePretrainedDoc2Vec.html | 8 ++-- .../ReadablePretrainedElmoModel.html | 8 ++-- .../ReadablePretrainedLongformerModel.html | 8 ++-- .../ReadablePretrainedRobertaModel.html | 8 ++-- ...eadablePretrainedRobertaSentenceModel.html | 8 ++-- .../ReadablePretrainedUSEModel.html | 8 ++-- .../ReadablePretrainedWord2Vec.html | 8 ++-- .../ReadablePretrainedWordEmbeddings.html | 8 ++-- .../ReadablePretrainedXlmRobertaModel.html | 8 ++-- ...ablePretrainedXlmRobertaSentenceModel.html | 8 ++-- .../ReadablePretrainedXlnetModel.html | 8 ++-- .../nlp/embeddings/ReadsFromBytes.html | 8 ++-- .../nlp/embeddings/RoBertaEmbeddings$.html | 8 ++-- .../nlp/embeddings/RoBertaEmbeddings.html | 8 ++-- .../RoBertaSentenceEmbeddings$.html | 8 ++-- .../embeddings/RoBertaSentenceEmbeddings.html | 8 ++-- .../nlp/embeddings/SentenceEmbeddings$.html | 8 ++-- .../nlp/embeddings/SentenceEmbeddings.html | 8 ++-- .../embeddings/UniversalSentenceEncoder$.html | 8 ++-- .../embeddings/UniversalSentenceEncoder.html | 8 ++-- .../nlp/embeddings/Word2VecApproach$.html | 8 ++-- .../nlp/embeddings/Word2VecApproach.html | 8 ++-- .../nlp/embeddings/Word2VecModel$.html | 8 ++-- .../nlp/embeddings/Word2VecModel.html | 8 ++-- .../nlp/embeddings/WordEmbeddings$.html | 8 ++-- .../nlp/embeddings/WordEmbeddings.html | 8 ++-- .../WordEmbeddingsBinaryIndexer$.html | 8 ++-- .../nlp/embeddings/WordEmbeddingsModel$.html | 8 ++-- .../nlp/embeddings/WordEmbeddingsModel.html | 8 ++-- .../nlp/embeddings/WordEmbeddingsReader.html | 8 ++-- .../WordEmbeddingsTextIndexer$.html | 8 ++-- .../nlp/embeddings/WordEmbeddingsWriter.html | 8 ++-- .../nlp/embeddings/XlmRoBertaEmbeddings$.html | 8 ++-- .../nlp/embeddings/XlmRoBertaEmbeddings.html | 8 ++-- .../XlmRoBertaSentenceEmbeddings$.html | 8 ++-- .../XlmRoBertaSentenceEmbeddings.html | 8 ++-- .../nlp/embeddings/XlnetEmbeddings$.html | 8 ++-- .../nlp/embeddings/XlnetEmbeddings.html | 8 ++-- .../johnsnowlabs/nlp/embeddings/index.html | 8 ++-- .../nlp/functions$$EachAnnotations.html | 8 ++-- .../nlp/functions$$ExplodeAnnotations.html | 8 ++-- .../nlp/functions$$FilterAnnotations.html | 8 ++-- .../nlp/functions$$MapAnnotations.html | 8 ++-- docs/api/com/johnsnowlabs/nlp/functions$.html | 8 ++-- docs/api/com/johnsnowlabs/nlp/index.html | 8 ++-- .../nlp/pretrained/PretrainedPipeline$.html | 8 ++-- .../nlp/pretrained/PretrainedPipeline.html | 8 ++-- .../pretrained/PythonResourceDownloader$.html | 14 +++---- .../nlp/pretrained/RepositoryMetadata.html | 8 ++-- .../nlp/pretrained/ResourceDownloader$.html | 38 +++++++++++++------ .../nlp/pretrained/ResourceDownloader.html | 14 +++---- .../nlp/pretrained/ResourceMetadata$.html | 8 ++-- .../nlp/pretrained/ResourceMetadata.html | 8 ++-- .../nlp/pretrained/ResourceRequest.html | 8 ++-- .../nlp/pretrained/ResourceType$.html | 8 ++-- .../nlp/pretrained/S3ResourceDownloader.html | 14 +++---- .../johnsnowlabs/nlp/pretrained/index.html | 8 ++-- .../com/johnsnowlabs/nlp/recursive/index.html | 8 ++-- .../nlp/recursive/package$$Recursive.html | 8 ++-- .../recursive/package$$RecursiveModel.html | 8 ++-- .../nlp/serialization/ArrayFeature.html | 8 ++-- .../nlp/serialization/Feature.html | 8 ++-- .../nlp/serialization/MapFeature.html | 8 ++-- .../SerializedExternalResource.html | 8 ++-- .../nlp/serialization/SetFeature.html | 8 ++-- .../nlp/serialization/StructFeature.html | 8 ++-- .../nlp/serialization/TransducerFeature.html | 8 ++-- .../johnsnowlabs/nlp/serialization/index.html | 8 ++-- .../com/johnsnowlabs/nlp/training/CoNLL.html | 8 ++-- .../nlp/training/CoNLL2003NerReader.html | 8 ++-- .../nlp/training/CoNLLDocument.html | 8 ++-- .../CoNLLHelper$$CoNLLSentenceCols.html | 8 ++-- .../training/CoNLLHelper$$CoNLLTokenCols.html | 8 ++-- .../nlp/training/CoNLLHelper$.html | 8 ++-- .../com/johnsnowlabs/nlp/training/CoNLLU.html | 8 ++-- .../nlp/training/CoNLLUCols$.html | 8 ++-- .../nlp/training/CoNLLUDocument.html | 8 ++-- .../com/johnsnowlabs/nlp/training/POS.html | 8 ++-- .../johnsnowlabs/nlp/training/PubTator.html | 8 ++-- .../nlp/training/SpacyToAnnotation.html | 8 ++-- .../com/johnsnowlabs/nlp/training/index.html | 8 ++-- .../johnsnowlabs/nlp/util/FinisherUtil$.html | 8 ++-- .../johnsnowlabs/nlp/util/GraphBuilder.html | 8 ++-- .../nlp/util/LfuCache$CachedItem.html | 8 ++-- .../nlp/util/LfuCache$DoubleLinked.html | 8 ++-- .../nlp/util/LfuCache$FrequencyList.html | 8 ++-- .../com/johnsnowlabs/nlp/util/LfuCache.html | 8 ++-- .../nlp/util/LruMap$KeyPriority.html | 8 ++-- .../nlp/util/LruMap$KeyPriorityOrdering$.html | 8 ++-- .../api/com/johnsnowlabs/nlp/util/LruMap.html | 8 ++-- .../nlp/util/SparkNlpConfigKeys$.html | 8 ++-- docs/api/com/johnsnowlabs/nlp/util/index.html | 8 ++-- .../nlp/util/io/ExternalResource$.html | 8 ++-- .../nlp/util/io/ExternalResource.html | 8 ++-- .../nlp/util/io/MatchStrategy$.html | 8 ++-- .../nlp/util/io/OutputHelper$.html | 8 ++-- .../com/johnsnowlabs/nlp/util/io/ReadAs$.html | 8 ++-- .../util/io/ResourceHelper$$SourceStream.html | 8 ++-- .../nlp/util/io/ResourceHelper$.html | 8 ++-- .../com/johnsnowlabs/nlp/util/io/index.html | 8 ++-- .../nlp/util/regex/RegexRule.html | 8 ++-- .../util/regex/RuleFactory$$RuleMatch.html | 8 ++-- .../nlp/util/regex/RuleFactory$.html | 8 ++-- .../nlp/util/regex/RuleFactory.html | 8 ++-- .../nlp/util/regex/TransformStrategy$.html | 8 ++-- .../johnsnowlabs/nlp/util/regex/index.html | 8 ++-- .../com/johnsnowlabs/storage/BytesKey.html | 8 ++-- .../com/johnsnowlabs/storage/Database$.html | 8 ++-- .../com/johnsnowlabs/storage/Database.html | 8 ++-- .../johnsnowlabs/storage/HasConnection.html | 8 ++-- .../com/johnsnowlabs/storage/HasStorage.html | 8 ++-- .../johnsnowlabs/storage/HasStorageModel.html | 8 ++-- .../storage/HasStorageOptions.html | 8 ++-- .../storage/HasStorageReader.html | 8 ++-- .../johnsnowlabs/storage/HasStorageRef$.html | 8 ++-- .../johnsnowlabs/storage/HasStorageRef.html | 8 ++-- .../storage/RocksDBConnection$.html | 8 ++-- .../storage/RocksDBConnection.html | 8 ++-- .../storage/StorageBatchWriter.html | 8 ++-- .../johnsnowlabs/storage/StorageFormat.html | 8 ++-- .../johnsnowlabs/storage/StorageHelper$.html | 8 ++-- .../johnsnowlabs/storage/StorageLocator$.html | 8 ++-- .../johnsnowlabs/storage/StorageLocator.html | 8 ++-- .../storage/StorageReadWriter.html | 8 ++-- .../johnsnowlabs/storage/StorageReadable.html | 8 ++-- .../johnsnowlabs/storage/StorageReader.html | 8 ++-- .../johnsnowlabs/storage/StorageWriter.html | 8 ++-- docs/api/com/johnsnowlabs/storage/index.html | 8 ++-- .../api/com/johnsnowlabs/util/Benchmark$.html | 8 ++-- docs/api/com/johnsnowlabs/util/Build$.html | 8 ++-- .../johnsnowlabs/util/CoNLLGenerator$.html | 8 ++-- .../com/johnsnowlabs/util/ConfigHelper$.html | 8 ++-- .../com/johnsnowlabs/util/ConfigLoader$.html | 8 ++-- .../com/johnsnowlabs/util/FileHelper$.html | 8 ++-- .../com/johnsnowlabs/util/JsonParser$.html | 8 ++-- .../johnsnowlabs/util/PipelineModels$.html | 8 ++-- .../johnsnowlabs/util/TrainingHelper$.html | 8 ++-- docs/api/com/johnsnowlabs/util/Version$.html | 8 ++-- docs/api/com/johnsnowlabs/util/Version.html | 8 ++-- .../johnsnowlabs/util/ZipArchiveUtil$.html | 8 ++-- docs/api/com/johnsnowlabs/util/index.html | 8 ++-- .../util/spark/LongMapAccumulator.html | 8 ++-- .../util/spark/MapAccumulator.html | 8 ++-- .../johnsnowlabs/util/spark/SparkUtil$.html | 8 ++-- .../com/johnsnowlabs/util/spark/index.html | 8 ++-- docs/api/index.html | 8 ++-- docs/api/index.js | 2 +- docs/api/python/.buildinfo | 2 +- docs/api/python/genindex.html | 2 +- docs/api/python/getting_started/index.html | 20 +++++----- docs/api/python/index.html | 4 +- docs/api/python/modules/index.html | 2 +- docs/api/python/modules/sparknlp.html | 6 +-- .../python/modules/sparknlp/annotation.html | 2 +- .../modules/sparknlp/annotation_audio.html | 2 +- .../modules/sparknlp/annotation_image.html | 2 +- .../annotator/audio/hubert_for_ctc.html | 2 +- .../annotator/audio/wav2vec2_for_ctc.html | 2 +- .../sparknlp/annotator/chunk2_doc.html | 2 +- .../modules/sparknlp/annotator/chunker.html | 2 +- .../albert_for_question_answering.html | 2 +- .../albert_for_sequence_classification.html | 2 +- .../albert_for_token_classification.html | 2 +- .../bert_for_question_answering.html | 2 +- .../bert_for_sequence_classification.html | 2 +- .../bert_for_token_classification.html | 2 +- .../bert_for_zero_shot_classification.html | 2 +- .../camembert_for_question_answering.html | 2 +- ...camembert_for_sequence_classification.html | 2 +- .../camembert_for_token_classification.html | 2 +- .../classifier_dl/classifier_dl.html | 2 +- .../deberta_for_question_answering.html | 2 +- .../deberta_for_sequence_classification.html | 2 +- .../deberta_for_token_classification.html | 2 +- .../distil_bert_for_question_answering.html | 2 +- ...stil_bert_for_sequence_classification.html | 2 +- .../distil_bert_for_token_classification.html | 2 +- ...til_bert_for_zero_shot_classification.html | 2 +- .../longformer_for_question_answering.html | 2 +- ...ongformer_for_sequence_classification.html | 2 +- .../longformer_for_token_classification.html | 2 +- .../classifier_dl/multi_classifier_dl.html | 2 +- ...rta_bert_for_zero_shot_classification.html | 2 +- .../roberta_for_question_answering.html | 2 +- .../roberta_for_sequence_classification.html | 2 +- .../roberta_for_token_classification.html | 2 +- .../annotator/classifier_dl/sentiment_dl.html | 2 +- .../tapas_for_question_answering.html | 2 +- .../xlm_roberta_for_question_answering.html | 2 +- ...m_roberta_for_sequence_classification.html | 2 +- .../xlm_roberta_for_token_classification.html | 2 +- .../xlnet_for_sequence_classification.html | 2 +- .../xlnet_for_token_classification.html | 2 +- .../annotator/coref/spanbert_coref.html | 2 +- .../cv/convnext_for_image_classification.html | 2 +- .../cv/swin_for_image_classification.html | 2 +- .../cv/vit_for_image_classification.html | 2 +- .../sparknlp/annotator/date2_chunk.html | 2 +- .../dependency/dependency_parser.html | 2 +- .../dependency/typed_dependency_parser.html | 2 +- .../annotator/document_normalizer.html | 2 +- .../embeddings/albert_embeddings.html | 2 +- .../annotator/embeddings/bert_embeddings.html | 2 +- .../embeddings/bert_sentence_embeddings.html | 2 +- .../embeddings/camembert_embeddings.html | 2 +- .../embeddings/chunk_embeddings.html | 2 +- .../embeddings/deberta_embeddings.html | 2 +- .../embeddings/distil_bert_embeddings.html | 2 +- .../annotator/embeddings/doc2vec.html | 2 +- .../annotator/embeddings/elmo_embeddings.html | 2 +- .../embeddings/longformer_embeddings.html | 2 +- .../embeddings/roberta_embeddings.html | 2 +- .../roberta_sentence_embeddings.html | 2 +- .../embeddings/sentence_embeddings.html | 2 +- .../universal_sentence_encoder.html | 2 +- .../annotator/embeddings/word2vec.html | 2 +- .../annotator/embeddings/word_embeddings.html | 2 +- .../embeddings/xlm_roberta_embeddings.html | 2 +- .../xlm_roberta_sentence_embeddings.html | 2 +- .../embeddings/xlnet_embeddings.html | 2 +- .../sparknlp/annotator/er/entity_ruler.html | 2 +- .../sparknlp/annotator/graph_extraction.html | 2 +- .../yake_keyword_extraction.html | 2 +- .../annotator/ld_dl/language_detector_dl.html | 2 +- .../sparknlp/annotator/lemmatizer.html | 2 +- .../annotator/matcher/big_text_matcher.html | 2 +- .../annotator/matcher/date_matcher.html | 2 +- .../annotator/matcher/multi_date_matcher.html | 2 +- .../annotator/matcher/regex_matcher.html | 2 +- .../annotator/matcher/text_matcher.html | 2 +- .../sparknlp/annotator/n_gram_generator.html | 2 +- .../sparknlp/annotator/ner/ner_approach.html | 2 +- .../sparknlp/annotator/ner/ner_converter.html | 2 +- .../sparknlp/annotator/ner/ner_crf.html | 2 +- .../sparknlp/annotator/ner/ner_dl.html | 2 +- .../annotator/ner/ner_overwriter.html | 2 +- .../annotator/ner/zero_shot_ner_model.html | 2 +- .../sparknlp/annotator/normalizer.html | 2 +- .../annotator/param/classifier_encoder.html | 2 +- .../annotator/param/evaluation_dl_params.html | 2 +- .../sparknlp/annotator/pos/perceptron.html | 2 +- .../annotator/sentence/sentence_detector.html | 2 +- .../sentence/sentence_detector_dl.html | 2 +- .../sentiment/sentiment_detector.html | 2 +- .../annotator/sentiment/vivekn_sentiment.html | 2 +- .../annotator/seq2seq/bart_transformer.html | 2 +- .../annotator/seq2seq/gpt2_transformer.html | 2 +- .../annotator/seq2seq/marian_transformer.html | 2 +- .../annotator/seq2seq/t5_transformer.html | 2 +- .../spell_check/context_spell_checker.html | 2 +- .../spell_check/norvig_sweeting.html | 2 +- .../spell_check/symmetric_delete.html | 2 +- .../modules/sparknlp/annotator/stemmer.html | 2 +- .../annotator/stop_words_cleaner.html | 2 +- .../annotator/tf_ner_dl_graph_builder.html | 2 +- .../annotator/token/chunk_tokenizer.html | 2 +- .../annotator/token/recursive_tokenizer.html | 2 +- .../annotator/token/regex_tokenizer.html | 2 +- .../sparknlp/annotator/token/tokenizer.html | 2 +- .../sparknlp/annotator/ws/word_segmenter.html | 2 +- .../sparknlp/base/audio_assembler.html | 2 +- .../modules/sparknlp/base/doc2_chunk.html | 2 +- .../sparknlp/base/document_assembler.html | 2 +- .../sparknlp/base/embeddings_finisher.html | 2 +- .../modules/sparknlp/base/finisher.html | 2 +- .../modules/sparknlp/base/graph_finisher.html | 2 +- .../sparknlp/base/has_recursive_fit.html | 2 +- .../base/has_recursive_transform.html | 2 +- .../sparknlp/base/image_assembler.html | 2 +- .../modules/sparknlp/base/light_pipeline.html | 2 +- .../base/multi_document_assembler.html | 2 +- .../sparknlp/base/recursive_pipeline.html | 2 +- .../sparknlp/base/table_assembler.html | 2 +- .../modules/sparknlp/base/token2_chunk.html | 2 +- .../sparknlp/base/token_assembler.html | 2 +- .../sparknlp/common/annotator_approach.html | 2 +- .../sparknlp/common/annotator_model.html | 2 +- .../sparknlp/common/annotator_properties.html | 2 +- .../sparknlp/common/match_strategy.html | 2 +- .../modules/sparknlp/common/properties.html | 2 +- .../modules/sparknlp/common/read_as.html | 2 +- .../common/recursive_annotator_approach.html | 2 +- .../python/modules/sparknlp/common/utils.html | 2 +- .../python/modules/sparknlp/functions.html | 2 +- .../sparknlp/internal/annotator_java_ml.html | 2 +- .../internal/annotator_transformer.html | 2 +- .../internal/extended_java_wrapper.html | 2 +- .../internal/params_getters_setters.html | 2 +- .../modules/sparknlp/internal/recursive.html | 2 +- .../modules/sparknlp/logging/comet.html | 2 +- .../pretrained/pretrained_pipeline.html | 2 +- .../pretrained/resource_downloader.html | 14 ++++--- .../modules/sparknlp/training/conll.html | 2 +- .../modules/sparknlp/training/conllu.html | 2 +- .../python/modules/sparknlp/training/pos.html | 2 +- .../modules/sparknlp/training/pub_tator.html | 2 +- .../training/spacy_to_annotation.html | 2 +- docs/api/python/py-modindex.html | 2 +- .../sparknlp/annotation/index.html | 2 +- .../sparknlp/annotation_audio/index.html | 2 +- .../sparknlp/annotation_image/index.html | 2 +- .../annotator/audio/hubert_for_ctc/index.html | 2 +- .../sparknlp/annotator/audio/index.html | 2 +- .../audio/wav2vec2_for_ctc/index.html | 2 +- .../sparknlp/annotator/chunk2_doc/index.html | 2 +- .../sparknlp/annotator/chunker/index.html | 2 +- .../albert_for_question_answering/index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../bert_for_question_answering/index.html | 2 +- .../index.html | 2 +- .../bert_for_token_classification/index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../classifier_dl/classifier_dl/index.html | 2 +- .../deberta_for_question_answering/index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../annotator/classifier_dl/index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../multi_classifier_dl/index.html | 2 +- .../index.html | 2 +- .../roberta_for_question_answering/index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../classifier_dl/sentiment_dl/index.html | 2 +- .../tapas_for_question_answering/index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../index.html | 2 +- .../xlnet_for_token_classification/index.html | 2 +- .../sparknlp/annotator/coref/index.html | 2 +- .../annotator/coref/spanbert_coref/index.html | 2 +- .../index.html | 2 +- .../sparknlp/annotator/cv/index.html | 2 +- .../swin_for_image_classification/index.html | 2 +- .../vit_for_image_classification/index.html | 2 +- .../sparknlp/annotator/date2_chunk/index.html | 2 +- .../dependency/dependency_parser/index.html | 2 +- .../sparknlp/annotator/dependency/index.html | 2 +- .../typed_dependency_parser/index.html | 2 +- .../annotator/document_normalizer/index.html | 2 +- .../embeddings/albert_embeddings/index.html | 2 +- .../embeddings/bert_embeddings/index.html | 2 +- .../bert_sentence_embeddings/index.html | 2 +- .../camembert_embeddings/index.html | 2 +- .../embeddings/chunk_embeddings/index.html | 2 +- .../embeddings/deberta_embeddings/index.html | 2 +- .../distil_bert_embeddings/index.html | 2 +- .../annotator/embeddings/doc2vec/index.html | 2 +- .../embeddings/elmo_embeddings/index.html | 2 +- .../sparknlp/annotator/embeddings/index.html | 2 +- .../longformer_embeddings/index.html | 2 +- .../embeddings/roberta_embeddings/index.html | 2 +- .../roberta_sentence_embeddings/index.html | 2 +- .../embeddings/sentence_embeddings/index.html | 2 +- .../universal_sentence_encoder/index.html | 2 +- .../annotator/embeddings/word2vec/index.html | 2 +- .../embeddings/word_embeddings/index.html | 2 +- .../xlm_roberta_embeddings/index.html | 2 +- .../index.html | 2 +- .../embeddings/xlnet_embeddings/index.html | 2 +- .../annotator/er/entity_ruler/index.html | 2 +- .../sparknlp/annotator/er/index.html | 2 +- .../annotator/graph_extraction/index.html | 2 +- .../autosummary/sparknlp/annotator/index.html | 2 +- .../annotator/keyword_extraction/index.html | 2 +- .../yake_keyword_extraction/index.html | 2 +- .../sparknlp/annotator/ld_dl/index.html | 2 +- .../ld_dl/language_detector_dl/index.html | 2 +- .../sparknlp/annotator/lemmatizer/index.html | 2 +- .../matcher/big_text_matcher/index.html | 2 +- .../annotator/matcher/date_matcher/index.html | 2 +- .../sparknlp/annotator/matcher/index.html | 2 +- .../matcher/multi_date_matcher/index.html | 2 +- .../matcher/regex_matcher/index.html | 2 +- .../annotator/matcher/text_matcher/index.html | 2 +- .../annotator/n_gram_generator/index.html | 2 +- .../sparknlp/annotator/ner/index.html | 2 +- .../annotator/ner/ner_approach/index.html | 2 +- .../annotator/ner/ner_converter/index.html | 2 +- .../sparknlp/annotator/ner/ner_crf/index.html | 2 +- .../sparknlp/annotator/ner/ner_dl/index.html | 2 +- .../annotator/ner/ner_overwriter/index.html | 2 +- .../ner/zero_shot_ner_model/index.html | 2 +- .../sparknlp/annotator/normalizer/index.html | 2 +- .../param/classifier_encoder/index.html | 2 +- .../param/evaluation_dl_params/index.html | 2 +- .../sparknlp/annotator/param/index.html | 2 +- .../sparknlp/annotator/pos/index.html | 2 +- .../annotator/pos/perceptron/index.html | 2 +- .../sparknlp/annotator/sentence/index.html | 2 +- .../sentence/sentence_detector/index.html | 2 +- .../sentence/sentence_detector_dl/index.html | 2 +- .../sparknlp/annotator/sentiment/index.html | 2 +- .../sentiment/sentiment_detector/index.html | 2 +- .../sentiment/vivekn_sentiment/index.html | 2 +- .../seq2seq/bart_transformer/index.html | 2 +- .../seq2seq/gpt2_transformer/index.html | 2 +- .../sparknlp/annotator/seq2seq/index.html | 2 +- .../seq2seq/marian_transformer/index.html | 2 +- .../seq2seq/t5_transformer/index.html | 2 +- .../context_spell_checker/index.html | 2 +- .../sparknlp/annotator/spell_check/index.html | 2 +- .../spell_check/norvig_sweeting/index.html | 2 +- .../spell_check/symmetric_delete/index.html | 2 +- .../sparknlp/annotator/stemmer/index.html | 2 +- .../annotator/stop_words_cleaner/index.html | 2 +- .../tf_ner_dl_graph_builder/index.html | 2 +- .../token/chunk_tokenizer/index.html | 2 +- .../sparknlp/annotator/token/index.html | 2 +- .../token/recursive_tokenizer/index.html | 2 +- .../token/regex_tokenizer/index.html | 2 +- .../annotator/token/tokenizer/index.html | 2 +- .../sparknlp/annotator/ws/index.html | 2 +- .../annotator/ws/word_segmenter/index.html | 2 +- .../sparknlp/base/audio_assembler/index.html | 2 +- .../sparknlp/base/doc2_chunk/index.html | 2 +- .../base/document_assembler/index.html | 2 +- .../base/embeddings_finisher/index.html | 2 +- .../sparknlp/base/finisher/index.html | 2 +- .../sparknlp/base/graph_finisher/index.html | 2 +- .../base/has_recursive_fit/index.html | 2 +- .../base/has_recursive_transform/index.html | 2 +- .../sparknlp/base/image_assembler/index.html | 2 +- .../autosummary/sparknlp/base/index.html | 2 +- .../sparknlp/base/light_pipeline/index.html | 2 +- .../base/multi_document_assembler/index.html | 2 +- .../base/recursive_pipeline/index.html | 2 +- .../sparknlp/base/table_assembler/index.html | 2 +- .../sparknlp/base/token2_chunk/index.html | 2 +- .../sparknlp/base/token_assembler/index.html | 2 +- .../common/annotator_approach/index.html | 2 +- .../common/annotator_model/index.html | 2 +- .../common/annotator_properties/index.html | 2 +- .../sparknlp/common/annotator_type/index.html | 2 +- .../common/coverage_result/index.html | 2 +- .../autosummary/sparknlp/common/index.html | 2 +- .../sparknlp/common/match_strategy/index.html | 2 +- .../sparknlp/common/properties/index.html | 2 +- .../sparknlp/common/read_as/index.html | 2 +- .../recursive_annotator_approach/index.html | 2 +- .../sparknlp/common/storage/index.html | 2 +- .../sparknlp/common/utils/index.html | 2 +- .../autosummary/sparknlp/functions/index.html | 2 +- .../reference/autosummary/sparknlp/index.html | 2 +- .../internal/annotator_java_ml/index.html | 2 +- .../internal/annotator_transformer/index.html | 2 +- .../internal/extended_java_wrapper/index.html | 2 +- .../autosummary/sparknlp/internal/index.html | 2 +- .../params_getters_setters/index.html | 2 +- .../sparknlp/internal/recursive/index.html | 2 +- .../sparknlp/logging/comet/index.html | 2 +- .../autosummary/sparknlp/logging/index.html | 2 +- .../sparknlp/pretrained/index.html | 2 +- .../pretrained/pretrained_pipeline/index.html | 2 +- .../pretrained/resource_downloader/index.html | 24 +++++++----- .../sparknlp/pretrained/utils/index.html | 2 +- .../sparknlp/training/conll/index.html | 2 +- .../sparknlp/training/conllu/index.html | 2 +- .../autosummary/sparknlp/training/index.html | 2 +- .../sparknlp/training/pos/index.html | 2 +- .../sparknlp/training/pub_tator/index.html | 2 +- .../training/spacy_to_annotation/index.html | 2 +- .../sparknlp/training/tfgraphs/index.html | 2 +- .../sparknlp/upload_to_hub/index.html | 2 +- .../autosummary/sparknlp/util/index.html | 2 +- docs/api/python/reference/index.html | 2 +- docs/api/python/search.html | 2 +- docs/api/python/searchindex.js | 2 +- .../python/static/documentation_options.js | 2 +- docs/api/python/third_party/Comet.html | 2 +- docs/api/python/third_party/MLflow.html | 2 +- docs/api/python/third_party/index.html | 2 +- docs/api/python/user_guide/annotation.html | 2 +- docs/api/python/user_guide/annotators.html | 2 +- .../python/user_guide/custom_pipelines.html | 2 +- docs/api/python/user_guide/helpers.html | 2 +- docs/api/python/user_guide/index.html | 4 +- .../python/user_guide/light_pipelines.html | 2 +- .../user_guide/pretrained_pipelines.html | 2 +- docs/api/python/user_guide/training.html | 2 +- 1307 files changed, 4298 insertions(+), 4274 deletions(-) diff --git a/docs/api/com/index.html b/docs/api/com/index.html index 96717982a30200..4bd79f81475b1f 100644 --- a/docs/api/com/index.html +++ b/docs/api/com/index.html @@ -3,9 +3,9 @@ - Spark NLP 4.4.3 ScalaDoc - com - - + Spark NLP 4.4.4 ScalaDoc - com + + @@ -28,7 +28,7 @@