diff --git a/docs/_posts/Cabir40/2024-10-21-bge_medembed_base_v0_1_en.md b/docs/_posts/Cabir40/2024-10-21-bge_medembed_base_v0_1_en.md new file mode 100644 index 00000000000000..f21ba93b82dd9c --- /dev/null +++ b/docs/_posts/Cabir40/2024-10-21-bge_medembed_base_v0_1_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English bge_medembed_base_v0_1 BGEEmbeddings from abhinand +author: John Snow Labs +name: bge_medembed_base_v0_1 +date: 2024-10-21 +tags: [embedding, en, open_source, bge, medical, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. +`bge_medembed_base_v0_1` is a English model originally trained by abhinand + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_medembed_base_v0_1_en_5.5.0_3.0_1729515433167.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_medembed_base_v0_1_en_5.5.0_3.0_1729515433167.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_medembed_base_v0_1","en")\ + .setInputCols(["document"])\ + .setOutputCol("embeddings") + +pipeline = Pipeline( + stages = [ + document_assembler, + embeddings +]) + +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = BGEEmbeddings.pretrained("bge_medembed_base_v0_1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val data = Seq("I love spark-nlp").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + +``` +
+ +## Results + +```bash + ++----------------------------------------------------------------------------------------------------+ +| bge_embedding| ++----------------------------------------------------------------------------------------------------+ +|[{sentence_embeddings, 0, 15, I love spark-nlp, {sentence -> 0}, [-0.018065551, -0.032784615, 0.0...| ++----------------------------------------------------------------------------------------------------+ + +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_medembed_base_v0_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|389.7 MB| \ No newline at end of file diff --git a/docs/_posts/Cabir40/2024-10-21-bge_medembed_large_v0_1_en.md b/docs/_posts/Cabir40/2024-10-21-bge_medembed_large_v0_1_en.md new file mode 100644 index 00000000000000..ef752830de8a8f --- /dev/null +++ b/docs/_posts/Cabir40/2024-10-21-bge_medembed_large_v0_1_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English bge_medembed_large_v0_1 BGEEmbeddings from abhinand +author: John Snow Labs +name: bge_medembed_large_v0_1 +date: 2024-10-21 +tags: [embedding, en, open_source, bge, medical, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. +`bge_medembed_large_v0_1` is a English model originally trained by abhinand + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_medembed_large_v0_1_en_5.5.0_3.0_1729515260623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_medembed_large_v0_1_en_5.5.0_3.0_1729515260623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_medembed_large_v0_1","en")\ + .setInputCols(["document"])\ + .setOutputCol("embeddings") + +pipeline = Pipeline( + stages = [ + document_assembler, + embeddings +]) + +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = BGEEmbeddings.pretrained("bge_medembed_large_v0_1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val data = Seq("I love spark-nlp").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + +``` +
+ +## Results + +```bash + ++----------------------------------------------------------------------------------------------------+ +| bge_embedding| ++----------------------------------------------------------------------------------------------------+ +|[{sentence_embeddings, 0, 15, I love spark-nlp, {sentence -> 0}, [-0.018065551, -0.032784615, 0.0...| ++----------------------------------------------------------------------------------------------------+ + +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_medembed_large_v0_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|1.2 GB| \ No newline at end of file diff --git a/docs/_posts/Cabir40/2024-10-21-bge_medembed_small_v0_1_en.md b/docs/_posts/Cabir40/2024-10-21-bge_medembed_small_v0_1_en.md new file mode 100644 index 00000000000000..b46280a80ba64c --- /dev/null +++ b/docs/_posts/Cabir40/2024-10-21-bge_medembed_small_v0_1_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English bge_medembed_small_v0_1 BGEEmbeddings from abhinand +author: John Snow Labs +name: bge_medembed_small_v0_1 +date: 2024-10-21 +tags: [embedding, en, open_source, bge, medical, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. +`bge_medembed_small_v0_1` is a English model originally trained by abhinand + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_medembed_small_v0_1_en_5.5.0_3.0_1729513920928.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_medembed_small_v0_1_en_5.5.0_3.0_1729513920928.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_medembed_small_v0_1","en")\ + .setInputCols(["document"])\ + .setOutputCol("embeddings") + +pipeline = Pipeline( + stages = [ + document_assembler, + embeddings +]) + +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = BGEEmbeddings.pretrained("bge_medembed_small_v0_1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val data = Seq("I love spark-nlp").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + +``` +
+ +## Results + +```bash + ++----------------------------------------------------------------------------------------------------+ +| bge_embedding| ++----------------------------------------------------------------------------------------------------+ +|[{sentence_embeddings, 0, 15, I love spark-nlp, {sentence -> 0}, [-0.07673764, -0.04207312, 0.026...| ++----------------------------------------------------------------------------------------------------+ + +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_medembed_small_v0_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|116.4 MB| \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_iq3_m_en.md b/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_iq3_m_en.md new file mode 100644 index 00000000000000..930f9a232d7d68 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_iq3_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English gemma_2_2b_it_iq3_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: gemma_2_2b_it_iq3_m +date: 2024-10-10 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, tensorflow] +task: Text Generation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gemma_2_2b_it_iq3_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_iq3_m_en_5.5.0_3.0_1728575178358.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_iq3_m_en_5.5.0_3.0_1728575178358.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_iq3_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_iq3_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gemma_2_2b_it_iq3_m| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_iq4_xs_en.md b/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_iq4_xs_en.md new file mode 100644 index 00000000000000..f8ac691adc0dca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_iq4_xs_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English gemma_2_2b_it_iq4_xs AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: gemma_2_2b_it_iq4_xs +date: 2024-10-10 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, tensorflow] +task: Text Generation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gemma_2_2b_it_iq4_xs` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_iq4_xs_en_5.5.0_3.0_1728575247990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_iq4_xs_en_5.5.0_3.0_1728575247990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_iq4_xs","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_iq4_xs", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gemma_2_2b_it_iq4_xs| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q3_k_l_en.md b/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q3_k_l_en.md new file mode 100644 index 00000000000000..fa8462a8e0f44f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q3_k_l_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English gemma_2_2b_it_q3_k_l AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: gemma_2_2b_it_q3_k_l +date: 2024-10-10 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, tensorflow] +task: Text Generation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gemma_2_2b_it_q3_k_l` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q3_k_l_en_5.5.0_3.0_1728575314785.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q3_k_l_en_5.5.0_3.0_1728575314785.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q3_k_l","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q3_k_l", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gemma_2_2b_it_q3_k_l| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q4_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q4_k_m_en.md new file mode 100644 index 00000000000000..6b255912c3f24a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q4_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English gemma_2_2b_it_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: gemma_2_2b_it_q4_k_m +date: 2024-10-10 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, tensorflow] +task: Text Generation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gemma_2_2b_it_q4_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q4_k_m_en_5.5.0_3.0_1728575388230.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q4_k_m_en_5.5.0_3.0_1728575388230.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q4_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q4_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gemma_2_2b_it_q4_k_m| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q5_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q5_k_m_en.md new file mode 100644 index 00000000000000..7b271fcc87c8d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q5_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English gemma_2_2b_it_q5_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: gemma_2_2b_it_q5_k_m +date: 2024-10-10 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, tensorflow] +task: Text Generation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gemma_2_2b_it_q5_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q5_k_m_en_5.5.0_3.0_1728575468002.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q5_k_m_en_5.5.0_3.0_1728575468002.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q5_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q5_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gemma_2_2b_it_q5_k_m| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.9 GB| + +## References + +https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q6_k_en.md b/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q6_k_en.md new file mode 100644 index 00000000000000..98cb76be59faaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q6_k_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English gemma_2_2b_it_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: gemma_2_2b_it_q6_k +date: 2024-10-10 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, tensorflow] +task: Text Generation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gemma_2_2b_it_q6_k` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q6_k_en_5.5.0_3.0_1728575557458.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q6_k_en_5.5.0_3.0_1728575557458.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q6_k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q6_k", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gemma_2_2b_it_q6_k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|2.1 GB| + +## References + +https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q8_0_en.md b/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q8_0_en.md new file mode 100644 index 00000000000000..262a5108425f10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-10-gemma_2_2b_it_q8_0_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English gemma_2_2b_it_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: gemma_2_2b_it_q8_0 +date: 2024-10-10 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, tensorflow] +task: Text Generation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gemma_2_2b_it_q8_0` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q8_0_en_5.5.0_3.0_1728575672163.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q8_0_en_5.5.0_3.0_1728575672163.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q8_0","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q8_0", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gemma_2_2b_it_q8_0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|2.7 GB| + +## References + +https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-10-llama_3.2_3b_instruct_q3_k_l_xx.md b/docs/_posts/ahmedlone127/2024-10-10-llama_3.2_3b_instruct_q3_k_l_xx.md new file mode 100644 index 00000000000000..bc26f92d85a6c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-10-llama_3.2_3b_instruct_q3_k_l_xx.md @@ -0,0 +1,101 @@ +--- +layout: model +title: Multilingual llama_3.2_3b_instruct_q3_k_l AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: llama_3.2_3b_instruct_q3_k_l +date: 2024-10-10 +tags: [xx, open_source, onnx, conversational, text_generation, text_to_text, en, de, fr, it, pt, hi, es, th, tensorflow] +task: Text Generation +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llama_3.2_3b_instruct_q3_k_l` is a Multilingual model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llama_3.2_3b_instruct_q3_k_l_xx_5.5.0_3.0_1728575951058.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llama_3.2_3b_instruct_q3_k_l_xx_5.5.0_3.0_1728575951058.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_3b_instruct_q3_k_l","xx") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_3b_instruct_q3_k_l", "xx") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llama_3.2_3b_instruct_q3_k_l| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|xx| +|Size:|1.8 GB| + +## References + +https://huggingface.co/lmstudio-community/Llama-3.2-3B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-10-llama_3.2_3b_instruct_q4_k_m_xx.md b/docs/_posts/ahmedlone127/2024-10-10-llama_3.2_3b_instruct_q4_k_m_xx.md new file mode 100644 index 00000000000000..9b069bb7a6235a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-10-llama_3.2_3b_instruct_q4_k_m_xx.md @@ -0,0 +1,101 @@ +--- +layout: model +title: Multilingual llama_3.2_3b_instruct_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: llama_3.2_3b_instruct_q4_k_m +date: 2024-10-10 +tags: [xx, open_source, onnx, conversational, text_generation, text_to_text, en, de, fr, it, pt, hi, es, th, tensorflow] +task: Text Generation +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llama_3.2_3b_instruct_q4_k_m` is a Multilingual model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llama_3.2_3b_instruct_q4_k_m_xx_5.5.0_3.0_1728576043870.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llama_3.2_3b_instruct_q4_k_m_xx_5.5.0_3.0_1728576043870.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_3b_instruct_q4_k_m","xx") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_3b_instruct_q4_k_m", "xx") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llama_3.2_3b_instruct_q4_k_m| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|xx| +|Size:|2.0 GB| + +## References + +https://huggingface.co/lmstudio-community/Llama-3.2-3B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-11-sent_roberta_base_en.md b/docs/_posts/ahmedlone127/2024-10-11-sent_roberta_base_en.md new file mode 100644 index 00000000000000..69a00468280fd3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-11-sent_roberta_base_en.md @@ -0,0 +1,77 @@ +--- +layout: model +title: RoBERTa Base Sentence Embeddings(sent_roberta_base) +author: John Snow Labs +name: sent_roberta_base +date: 2024-10-11 +tags: [sentence_embeddings, en, english, roberta, open_source, onnx, openvino] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: openvino +annotator: RoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained model on English language using a masked language modeling (MLM) objective. It was introduced in this paper and first released in this repository. This model is case-sensitive: it makes a difference between english and English. + +RoBERTa is a transformers model pretrained on a large corpus of English data in a self-supervised fashion. This means it was pretrained on the raw texts only, with no humans labeling them in any way (which is why it can use lots of publicly available data) with an automatic process to generate inputs and labels from those texts. + +More precisely, it was pretrained with the Masked language modeling (MLM) objective. Taking a sentence, the model randomly masks 15% of the words in the input then runs the entire masked sentence through the model and has to predict the masked words. This is different from traditional recurrent neural networks (RNNs) that usually see the words one after the other, or from autoregressive models like GPT which internally mask the future tokens. It allows the model to learn a bidirectional representation of the sentence. + +This way, the model learns an inner representation of the English language that can then be used to extract features useful for downstream tasks: if you have a dataset of labeled sentences, for instance, you can train a standard classifier using the features produced by the BERT model as inputs. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_roberta_base_en_5.5.0_3.0_1728677006918.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_roberta_base_en_5.5.0_3.0_1728677006918.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = RoBertaSentenceEmbeddings.pretrained("sent_roberta_base", "en") \ + .setInputCols("sentence") \ + .setOutputCol("embeddings") +``` +```scala +val embeddings = RoBertaSentenceEmbeddings.pretrained("sent_roberta_base", "en") + .setInputCols("sentence") + .setOutputCol("embeddings") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[sentence_embeddings]| +|Language:|en| +|Size:|297.7 MB| +|Max sentence length:|32| + +## References + +References + +https://huggingface.co/FacebookAI/roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-11-snowflake_artic_m_en.md b/docs/_posts/ahmedlone127/2024-10-11-snowflake_artic_m_en.md new file mode 100644 index 00000000000000..ef87897d8978ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-11-snowflake_artic_m_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: SnowFlake Medium Model +author: John Snow Labs +name: snowflake_artic_m +date: 2024-10-11 +tags: [embeddings, snowflake, en, open_source, onnx, openvino] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: openvino +annotator: SnowFlakeEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained SnowFlakeEmbeddings, adataped from huggingface imported to Spark-NLP to provide scalability and production-readiness. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/snowflake_artic_m_en_5.5.0_3.0_1728683126777.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/snowflake_artic_m_en_5.5.0_3.0_1728683126777.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +snowflake = SnowFlakeEmbeddings.pretrained("snowflake_artic_m","en") \ + .setInputCols("document") \ + .setOutputCol("embeddings") \ + +pipeline = Pipeline().setStages([documentAssembler, snowflake]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val snowflake = SnowFlakeEmbeddings.pretrained("snowflake_artic_m", "en") + .setInputCols("documents") + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, snowflake)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|snowflake_artic_m| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[snowflake]| +|Language:|en| +|Size:|405.7 MB| + +## References + +https://huggingface.co/Snowflake/snowflake-arctic-embed-m \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-13-uae_large_v1_en.md b/docs/_posts/ahmedlone127/2024-10-13-uae_large_v1_en.md new file mode 100644 index 00000000000000..8ff75bcb369cbc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-13-uae_large_v1_en.md @@ -0,0 +1,125 @@ +--- +layout: model +title: UAE-Large-V1 for Sentence Embeddings +author: John Snow Labs +name: uae_large_v1 +date: 2024-10-13 +tags: [uae, en, sentence, embeddings, open_source, onnx, openvino] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: openvino +annotator: UAEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +UAE is a novel angle-optimized text embedding model, designed to improve semantic textual +similarity tasks, which are crucial for Large Language Model (LLM) applications. By +introducing angle optimization in a complex space, AnglE effectively mitigates saturation of +the cosine similarity function. + +This model is based on UAE-Large-V1 and was orignally exported from https://huggingface.co/WhereIsAI/UAE-Large-V1. Several embedding pooling strategies can be set. Please refer to the class for more information. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/uae_large_v1_en_5.5.0_3.0_1728822609847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/uae_large_v1_en_5.5.0_3.0_1728822609847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +import sparknlp +from sparknlp.base import * +from sparknlp.annotator import * +from pyspark.ml import Pipeline +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") +embeddings = UAEEmbeddings.pretrained() \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") +embeddingsFinisher = EmbeddingsFinisher() \ + .setInputCols("embeddings") \ + .setOutputCols("finished_embeddings") \ + .setOutputAsVector(True) +pipeline = Pipeline().setStages([ + documentAssembler, + embeddings, + embeddingsFinisher +]) +data = spark.createDataFrame([["hello world", "hello moon"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.selectExpr("explode(finished_embeddings) as result").show(5, 80) +``` +```scala +import spark.implicits._ +import com.johnsnowlabs.nlp.base.DocumentAssembler +import com.johnsnowlabs.nlp.annotators.Tokenizer +import com.johnsnowlabs.nlp.embeddings.UAEEmbeddings +import com.johnsnowlabs.nlp.EmbeddingsFinisher +import org.apache.spark.ml.Pipeline +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") +val embeddings = UAEEmbeddings.pretrained() + .setInputCols("document") + .setOutputCol("UAE_embeddings") +val embeddingsFinisher = new EmbeddingsFinisher() + .setInputCols("UAE_embeddings") + .setOutputCols("finished_embeddings") + .setOutputAsVector(true) +val pipeline = new Pipeline().setStages(Array( + documentAssembler, + embeddings, + embeddingsFinisher +)) +val data = Seq("hello world", "hello moon").toDF("text") +val result = pipeline.fit(data).transform(data) +result.selectExpr("explode(finished_embeddings) as result").show(5, 80) +``` +
+ +## Results + +```bash + ++--------------------------------------------------------------------------------+ +| result| ++--------------------------------------------------------------------------------+ +|[0.50387806, 0.5861606, 0.35129607, -0.76046336, -0.32446072, -0.117674336, 0...| +|[0.6660665, 0.961762, 0.24854276, -0.1018044, -0.6569202, 0.027635604, 0.1915...| ++--------------------------------------------------------------------------------+ +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|uae_large_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/WhereIsAI/UAE-Large-V1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-14-sent_xlm_roberta_base_xx.md b/docs/_posts/ahmedlone127/2024-10-14-sent_xlm_roberta_base_xx.md new file mode 100644 index 00000000000000..427d32ee52e1d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-14-sent_xlm_roberta_base_xx.md @@ -0,0 +1,80 @@ +--- +layout: model +title: XLM-RoBERTa Base Sentence Embeddings (sent_xlm_roberta_base) +author: John Snow Labs +name: sent_xlm_roberta_base +date: 2024-10-14 +tags: [multilingual, xx, sentence_embeddings, xlm_roberta, open_source, tensorflow] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +[XLM-RoBERTa](https://ai.facebook.com/blog/-xlm-r-state-of-the-art-cross-lingual-understanding-through-self-supervision/) is a scaled cross-lingual sentence encoder. It is trained on 2.5T of data across 100 languages data filtered from Common Crawl. XLM-R achieves state-of-the-arts results on multiple cross-lingual benchmarks. + +The XLM-RoBERTa model was proposed in [Unsupervised Cross-lingual Representation Learning at Scale](https://arxiv.org/abs/1911.02116) by Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer, and Veselin Stoyanov. + +It is based on Facebook's RoBERTa model released in 2019. It is a large multi-lingual language model, trained on 2.5TB of filtered CommonCrawl data. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_xx_5.5.0_3.0_1728933428578.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_xx_5.5.0_3.0_1728933428578.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base", "xx") \ + .setInputCols("sentence") \ + .setOutputCol("embeddings") +``` +```scala +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base", "xx") + .setInputCols("sentence") + .setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu +nlu.load("xx.embed_sentence.xlm_roberta.base").predict("""Put your text here.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[sentence_embeddings]| +|Language:|xx| +|Size:|655.0 MB| +|Case sensitive:|true| +|Max sentence length:|32| + +## References + +https://huggingface.co/xlm-roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-16-asr_hubert_large_ls960_en.md b/docs/_posts/ahmedlone127/2024-10-16-asr_hubert_large_ls960_en.md new file mode 100644 index 00000000000000..4100f0edb7e19f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-16-asr_hubert_large_ls960_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: ASR HubertForCTC - asr_hubert_large_ls960 +author: John Snow Labs +name: asr_hubert_large_ls960 +date: 2024-10-16 +tags: [hubert, en, open_source, onnx, openvino] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: openvino +annotator: HubertForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +“ +Hubert Model with a language modeling head on top for Connectionist Temporal Classification (CTC). Hubert was proposed in HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed. + +The large model fine-tuned on 960h of Librispeech on 16kHz sampled speech audio. When using the model make sure that your speech input is also sampled at 16Khz. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/asr_hubert_large_ls960_en_5.5.0_3.0_1729090392896.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/asr_hubert_large_ls960_en_5.5.0_3.0_1729090392896.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +audio_assembler = AudioAssembler()\ + .setInputCol("audio_content")\ + .setOutputCol("audio_assembler") + +speech_to_text = HubertForCTC.pretrained("asr_hubert_large_ls960", "en") .setInputCols("audio_assembler")\ + .setOutputCol("text") + +pipeline = Pipeline(stages=[ + audio_assembler, + speech_to_text, +]) + +pipelineModel = pipeline.fit(audioDf) + +pipelineDF = pipelineModel.transform(audioDf) +``` +```scala +val audioAssembler = new AudioAssembler() + .setInputCol("audio_content") + .setOutputCol("audio_assembler") + +val speechToText = HubertForCTC + .pretrained("asr_hubert_large_ls960", "en") + .setInputCols("audio_assembler") + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(audioAssembler, speechToText)) + +val pipelineModel = pipeline.fit(audioDf) + +val pipelineDF = pipelineModel.transform(audioDf) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|asr_hubert_large_ls960| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/facebook/hubert-large-ls960-ft \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-17-asr_wav2vec2_base_960h_en.md b/docs/_posts/ahmedlone127/2024-10-17-asr_wav2vec2_base_960h_en.md new file mode 100644 index 00000000000000..d728ea1164525c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-17-asr_wav2vec2_base_960h_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English asr_wav2vec2_base_960h TFWav2Vec2ForCTC from facebook +author: John Snow Labs +name: asr_wav2vec2_base_960h +date: 2024-10-17 +tags: [wav2vec2, en, open_source, onnx, openvino] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: openvino +annotator: Wav2Vec2ForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +“ + + + Pretrained Wav2vec2 model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.asr_wav2vec2_base_960h_by_facebook is a English model originally trained by facebook. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/asr_wav2vec2_base_960h_en_5.5.0_3.0_1729165403118.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/asr_wav2vec2_base_960h_en_5.5.0_3.0_1729165403118.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +audio_assembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speech_to_text = Wav2Vec2ForCTC \ + .pretrained("asr_wav2vec2_base_960h", "en")\ + .setInputCols("audio_assembler") \ + .setOutputCol("text") +``` +```scala +val audioAssembler = new AudioAssembler() + .setInputCol("audio_content") + .setOutputCol("audio_assembler") + +val speechToText = Wav2Vec2ForCTC + .pretrained("asr_wav2vec2_base_960h", "en") + .setInputCols("audio_assembler") + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(audioAssembler, speechToText)) + +val pipelineModel = pipeline.fit(audioDf) + +val pipelineDF = pipelineModel.transform(audioDf) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|asr_wav2vec2_base_960h| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|233.0 MB| + +## References + +https://huggingface.co/facebook/wav2vec2-base-960h \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-18-zero_shot_classifier_clip_vit_base_patch32_en.md b/docs/_posts/ahmedlone127/2024-10-18-zero_shot_classifier_clip_vit_base_patch32_en.md new file mode 100644 index 00000000000000..4003797b319cf4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-18-zero_shot_classifier_clip_vit_base_patch32_en.md @@ -0,0 +1,154 @@ +--- +layout: model +title: Image Zero Shot Classification with CLIP +author: John Snow Labs +name: zero_shot_classifier_clip_vit_base_patch32 +date: 2024-10-18 +tags: [classification, image, en, zero_shot, open_source, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +CLIP (Contrastive Language-Image Pre-Training) is a neural network that was trained on image +and text pairs. It has the ability to predict images without training on any hard-coded +labels. This makes it very flexible, as labels can be provided during inference. This is +similar to the zero-shot capabilities of the GPT-2 and 3 models. + +This model was imported from huggingface transformers: +https://huggingface.co/openai/clip-vit-base-patch32 + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/zero_shot_classifier_clip_vit_base_patch32_en_5.5.0_3.0_1729258523690.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/zero_shot_classifier_clip_vit_base_patch32_en_5.5.0_3.0_1729258523690.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +import sparknlp +from sparknlp.base import * +from sparknlp.annotator import * +from pyspark.ml import Pipeline + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +imageAssembler: ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +imageClassifier = CLIPForZeroShotClassification \ + .pretrained() \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([imageAssembler, imageClassifier]) +pipelineDF = pipeline.fit(imageDF).transform(imageDF) +pipelineDF \ + .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "label.result") \ + .show(truncate=False) +``` +```scala +import com.johnsnowlabs.nlp.ImageAssembler +import com.johnsnowlabs.nlp.annotator._ +import org.apache.spark.ml.Pipeline +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") +val imageAssembler: ImageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") +val imageClassifier = CLIPForZeroShotClassification + .pretrained() + .setInputCols("image_assembler") + .setOutputCol("label") + .setCandidateLabels(candidateLabels) +val pipeline = + new Pipeline().setStages(Array(imageAssembler, imageClassifier)).fit(imageDF).transform(imageDF) +pipeline + .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "label.result") + .show(truncate = false) +``` +
+ +## Results + +```bash + ++-----------------+-----------------------+ +|image_name |result | ++-----------------+-----------------------+ +|palace.JPEG |[a photo of a room] | +|egyptian_cat.jpeg|[a photo of a cat] | +|hippopotamus.JPEG|[a photo of a hippo] | +|hen.JPEG |[a photo of a hen] | +|ostrich.JPEG |[a photo of an ostrich]| +|junco.JPEG |[a photo of a bird] | +|bluetick.jpg |[a photo of a dog] | +|chihuahua.jpg |[a photo of a dog] | +|tractor.JPEG |[a photo of a tractor] | +|ox.JPEG |[a photo of an ox] | ++-----------------+-----------------------+ +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|zero_shot_classifier_clip_vit_base_patch32| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[classification]| +|Language:|en| +|Size:|397.1 MB| + +## References + +https://huggingface.co/openai/clip-vit-base-patch32 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-19-image_classifier_convnext_tiny_224_local_en.md b/docs/_posts/ahmedlone127/2024-10-19-image_classifier_convnext_tiny_224_local_en.md new file mode 100644 index 00000000000000..78ebe1e13fbb50 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-19-image_classifier_convnext_tiny_224_local_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English image_classifier_convnext_tiny_224_local ConvNextForImageClassification +author: John Snow Labs +name: image_classifier_convnext_tiny_224_local +date: 2024-10-19 +tags: [imagenet, image_classification, en, open_source, onnx, openvino] +task: Image Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: openvino +annotator: ConvNextForImageClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained ConvNext model for Image Classification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. + +The ConvNeXT model was proposed in A ConvNet for the 2020s by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/image_classifier_convnext_tiny_224_local_en_5.5.0_3.0_1729378592800.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/image_classifier_convnext_tiny_224_local_en_5.5.0_3.0_1729378592800.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +image_assembler = ImageAssembler() .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = ConvNextForImageClassification \ + .pretrained("image_classifier_convnext_tiny_224_local", "en") .setInputCols("image_assembler") \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[ + image_assembler, + imageClassifier, +]) + +pipelineModel = pipeline.fit(imageDF) + +pipelineDF = pipelineModel.transform(imageDF) +``` +```scala +val imageAssembler = new ImageAssembler() +.setInputCol("image") +.setOutputCol("image_assembler") + +val imageClassifier = ConvNextForImageClassification +.pretrained("image_classifier_convnext_tiny_224_local", "en") +.setInputCols("image_assembler") +.setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) + +val pipelineModel = pipeline.fit(imageDF) + +val pipelineDF = pipelineModel.transform(imageDF) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|image_classifier_convnext_tiny_224_local| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[class]| +|Language:|en| +|Size:|107.4 MB| + +## References + +https://huggingface.co/facebook/convnext-tiny-224 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-19-image_classifier_swin_base_patch4_window7_224_en.md b/docs/_posts/ahmedlone127/2024-10-19-image_classifier_swin_base_patch4_window7_224_en.md new file mode 100644 index 00000000000000..597d00396afd2c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-19-image_classifier_swin_base_patch4_window7_224_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English image_classifier_swin_base_patch4_window7_224 SwinForImageClassification +author: John Snow Labs +name: image_classifier_swin_base_patch4_window7_224 +date: 2024-10-19 +tags: [swin, image_classification, en, open_source, onnx, openvino] +task: Image Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: openvino +annotator: SwinForImageClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Swin model for Image Classification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. + +Swin Transformer was introduced in the paper Swin Transformer: Hierarchical Vision Transformer using Shifted Windows by Liu et al. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/image_classifier_swin_base_patch4_window7_224_en_5.5.0_3.0_1729373983113.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/image_classifier_swin_base_patch4_window7_224_en_5.5.0_3.0_1729373983113.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +image_assembler = ImageAssembler() .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = SwinForImageClassification \ + .pretrained("image_classifier_swin_base_patch4_window7_224", "en") .setInputCols("image_assembler") \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[ + image_assembler, + imageClassifier, +]) + +pipelineModel = pipeline.fit(imageDF) + +pipelineDF = pipelineModel.transform(imageDF) +``` +```scala +val imageAssembler = new ImageAssembler() +.setInputCol("image") +.setOutputCol("image_assembler") + +val imageClassifier = SwinForImageClassification +.pretrained("image_classifier_swin_base_patch4_window7_224", "en") +.setInputCols("image_assembler") +.setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) + +val pipelineModel = pipeline.fit(imageDF) + +val pipelineDF = pipelineModel.transform(imageDF) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|image_classifier_swin_base_patch4_window7_224| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[class]| +|Language:|en| +|Size:|211.9 MB| + +## References + +https://huggingface.co/openai/clip-vit-base-patch32 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-19-image_classifier_vit_base_patch16_224_en.md b/docs/_posts/ahmedlone127/2024-10-19-image_classifier_vit_base_patch16_224_en.md new file mode 100644 index 00000000000000..56d4c039b5885a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-19-image_classifier_vit_base_patch16_224_en.md @@ -0,0 +1,91 @@ +--- +layout: model +title: English image_classifier_vit_base_patch16_224 ViTForImageClassification from google +author: John Snow Labs +name: image_classifier_vit_base_patch16_224 +date: 2024-10-19 +tags: [vit, image_classification, en, open_source, onnx, openvino] +task: Image Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: openvino +annotator: ViTForImageClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained VIT model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.image_classifier_vit_base_patch16_224 is a English model originally trained by google. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/image_classifier_vit_base_patch16_224_en_5.5.0_3.0_1729341384692.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/image_classifier_vit_base_patch16_224_en_5.5.0_3.0_1729341384692.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +image_assembler = ImageAssembler() .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = ViTForImageClassification \ + .pretrained("image_classifier_vit_base_patch16_224", "en") .setInputCols("image_assembler") \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[ + image_assembler, + imageClassifier, +]) + +pipelineModel = pipeline.fit(imageDF) + +pipelineDF = pipelineModel.transform(imageDF) +``` +```scala +val imageAssembler = new ImageAssembler() +.setInputCol("image") +.setOutputCol("image_assembler") + +val imageClassifier = ViTForImageClassification +.pretrained("image_classifier_vit_base_patch16_224", "en") +.setInputCols("image_assembler") +.setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) + +val pipelineModel = pipeline.fit(imageDF) + +val pipelineDF = pipelineModel.transform(imageDF) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|image_classifier_vit_base_patch16_224| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[class]| +|Language:|en| +|Size:|324.0 MB| + +## References + +https://huggingface.co/google/vit-base-patch16-224 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-20-image_captioning_vit_gpt2_en.md b/docs/_posts/ahmedlone127/2024-10-20-image_captioning_vit_gpt2_en.md new file mode 100644 index 00000000000000..1020f508d818fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-20-image_captioning_vit_gpt2_en.md @@ -0,0 +1,109 @@ +--- +layout: model +title: Image Caption with VisionEncoderDecoder ViT GPT2 +author: John Snow Labs +name: image_captioning_vit_gpt2 +date: 2024-10-20 +tags: [en, image_classification, vit, gpt2, captioning, open_source, onnx] +task: Image Captioning +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: VisionEncoderDecoderForImageCaptioning +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This is an image captioning model using ViT to encode images and GPT2 to generate captions. Original model from https://huggingface.co/nlpconnect/vit-gpt2-image-captioning + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/image_captioning_vit_gpt2_en_5.5.0_3.0_1729463000155.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/image_captioning_vit_gpt2_en_5.5.0_3.0_1729463000155.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +import sparknlp +from sparknlp.base import * +from sparknlp.annotator import * +from pyspark.ml import Pipeline +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") +imageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") +imageCaptioning = VisionEncoderDecoderForImageCaptioning \ + .pretrained() \ + .setBeamSize(2) \ + .setDoSample(False) \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("caption") +pipeline = Pipeline().setStages([imageAssembler, imageCaptioning]) +pipelineDF = pipeline.fit(imageDF).transform(imageDF) +pipelineDF \ + .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "caption.result") .show(truncate = False) +``` +```scala +import com.johnsnowlabs.nlp.annotator._ +import com.johnsnowlabs.nlp.ImageAssembler +import org.apache.spark.ml.Pipeline + +val imageDF: DataFrame = spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val imageCaptioning = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = VisionEncoderDecoderForImageCaptioning + .pretrained() + .setBeamSize(2) + .setDoSample(false) + .setInputCols("image_assembler") + .setOutputCol("caption") + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageCaptioning)) +val pipelineDF = pipeline.fit(imageDF).transform(imageDF) + +pipelineDF + .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "caption.result") + .show(truncate = false) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|image_captioning_vit_gpt2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[caption]| +|Language:|en| +|Size:|894.8 MB| + +## References + +https://huggingface.co/nlpconnect/vit-gpt2-image-captioning \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-28-image_captioning_vit_gpt2_en.md b/docs/_posts/ahmedlone127/2024-10-28-image_captioning_vit_gpt2_en.md new file mode 100644 index 00000000000000..b7907d4c386d66 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-28-image_captioning_vit_gpt2_en.md @@ -0,0 +1,109 @@ +--- +layout: model +title: Image Caption with VisionEncoderDecoder ViT GPT2 +author: John Snow Labs +name: image_captioning_vit_gpt2 +date: 2024-10-28 +tags: [en, image_classification, vit, gpt2, captioning, open_source, openvino] +task: Image Captioning +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: openvino +annotator: VisionEncoderDecoderForImageCaptioning +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This is an image captioning model using ViT to encode images and GPT2 to generate captions. Original model from https://huggingface.co/nlpconnect/vit-gpt2-image-captioning + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/image_captioning_vit_gpt2_en_5.5.0_3.0_1730123370533.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/image_captioning_vit_gpt2_en_5.5.0_3.0_1730123370533.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +import sparknlp +from sparknlp.base import * +from sparknlp.annotator import * +from pyspark.ml import Pipeline +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") +imageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") +imageCaptioning = VisionEncoderDecoderForImageCaptioning \ + .pretrained() \ + .setBeamSize(2) \ + .setDoSample(False) \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("caption") +pipeline = Pipeline().setStages([imageAssembler, imageCaptioning]) +pipelineDF = pipeline.fit(imageDF).transform(imageDF) +pipelineDF \ + .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "caption.result") .show(truncate = False) +``` +```scala +import com.johnsnowlabs.nlp.annotator._ +import com.johnsnowlabs.nlp.ImageAssembler +import org.apache.spark.ml.Pipeline + +val imageDF: DataFrame = spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val imageCaptioning = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = VisionEncoderDecoderForImageCaptioning + .pretrained() + .setBeamSize(2) + .setDoSample(false) + .setInputCols("image_assembler") + .setOutputCol("caption") + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageCaptioning)) +val pipelineDF = pipeline.fit(imageDF).transform(imageDF) + +pipelineDF + .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "caption.result") + .show(truncate = false) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|image_captioning_vit_gpt2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[caption]| +|Language:|en| +|Size:|894.6 MB| + +## References + +https://huggingface.co/nlpconnect/vit-gpt2-image-captioning \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_iq3_m_en.md b/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_iq3_m_en.md new file mode 100644 index 00000000000000..800cd5c2da0dd3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_iq3_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English gemma_2_2b_it_iq3_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: gemma_2_2b_it_iq3_m +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, tensorflow] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gemma_2_2b_it_iq3_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_iq3_m_en_5.5.1_3.0_1730198545090.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_iq3_m_en_5.5.1_3.0_1730198545090.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_iq3_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_iq3_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gemma_2_2b_it_iq3_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_iq4_xs_en.md b/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_iq4_xs_en.md new file mode 100644 index 00000000000000..ee8effed849a5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_iq4_xs_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English gemma_2_2b_it_iq4_xs AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: gemma_2_2b_it_iq4_xs +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, tensorflow] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gemma_2_2b_it_iq4_xs` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_iq4_xs_en_5.5.1_3.0_1730198610792.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_iq4_xs_en_5.5.1_3.0_1730198610792.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_iq4_xs","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_iq4_xs", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gemma_2_2b_it_iq4_xs| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q3_k_l_en.md b/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q3_k_l_en.md new file mode 100644 index 00000000000000..3f0d1bf65f9ba9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q3_k_l_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English gemma_2_2b_it_q3_k_l AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: gemma_2_2b_it_q3_k_l +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, tensorflow] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gemma_2_2b_it_q3_k_l` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q3_k_l_en_5.5.1_3.0_1730198674631.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q3_k_l_en_5.5.1_3.0_1730198674631.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q3_k_l","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q3_k_l", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gemma_2_2b_it_q3_k_l| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q4_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q4_k_m_en.md new file mode 100644 index 00000000000000..9465a36e6b54bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q4_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English gemma_2_2b_it_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: gemma_2_2b_it_q4_k_m +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, tensorflow] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gemma_2_2b_it_q4_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q4_k_m_en_5.5.1_3.0_1730198748456.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q4_k_m_en_5.5.1_3.0_1730198748456.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q4_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q4_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gemma_2_2b_it_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q5_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q5_k_m_en.md new file mode 100644 index 00000000000000..a9f4a599205a29 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q5_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English gemma_2_2b_it_q5_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: gemma_2_2b_it_q5_k_m +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gemma_2_2b_it_q5_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q5_k_m_en_5.5.1_3.0_1730229529211.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q5_k_m_en_5.5.1_3.0_1730229529211.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q5_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q5_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gemma_2_2b_it_q5_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.9 GB| + +## References + +https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q6_k_en.md b/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q6_k_en.md new file mode 100644 index 00000000000000..cb30905c2f83d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q6_k_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English gemma_2_2b_it_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: gemma_2_2b_it_q6_k +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gemma_2_2b_it_q6_k` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q6_k_en_5.5.1_3.0_1730229619613.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q6_k_en_5.5.1_3.0_1730229619613.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q6_k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q6_k", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gemma_2_2b_it_q6_k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|2.1 GB| + +## References + +https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q8_0_en.md b/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q8_0_en.md new file mode 100644 index 00000000000000..09255cbff175a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-gemma_2_2b_it_q8_0_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English gemma_2_2b_it_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: gemma_2_2b_it_q8_0 +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gemma_2_2b_it_q8_0` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q8_0_en_5.5.1_3.0_1730229741349.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_2_2b_it_q8_0_en_5.5.1_3.0_1730229741349.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q8_0","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("gemma_2_2b_it_q8_0", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gemma_2_2b_it_q8_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|2.7 GB| + +## References + +https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_1b_instruct_q3_k_l_xx.md b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_1b_instruct_q3_k_l_xx.md new file mode 100644 index 00000000000000..2ba3187cf54129 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_1b_instruct_q3_k_l_xx.md @@ -0,0 +1,101 @@ +--- +layout: model +title: Multilingual llama_3.2_1b_instruct_q3_k_l AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: llama_3.2_1b_instruct_q3_k_l +date: 2024-10-29 +tags: [xx, open_source, onnx, conversational, text_generation, text_to_text, en, de, fr, it, pt, hi, es, th, llamacpp] +task: Text Generation +language: xx +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llama_3.2_1b_instruct_q3_k_l` is a Multilingual model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llama_3.2_1b_instruct_q3_k_l_xx_5.5.1_3.0_1730231028736.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llama_3.2_1b_instruct_q3_k_l_xx_5.5.1_3.0_1730231028736.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_1b_instruct_q3_k_l","xx") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_1b_instruct_q3_k_l", "xx") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llama_3.2_1b_instruct_q3_k_l| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|xx| +|Size:|728.0 MB| + +## References + +https://huggingface.co/lmstudio-community/Llama-3.2-1B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_1b_instruct_q4_k_m_xx.md b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_1b_instruct_q4_k_m_xx.md new file mode 100644 index 00000000000000..e99e759a6bd650 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_1b_instruct_q4_k_m_xx.md @@ -0,0 +1,101 @@ +--- +layout: model +title: Multilingual llama_3.2_1b_instruct_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: llama_3.2_1b_instruct_q4_k_m +date: 2024-10-29 +tags: [xx, open_source, onnx, conversational, text_generation, text_to_text, en, de, fr, it, pt, hi, es, th, llamacpp] +task: Text Generation +language: xx +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llama_3.2_1b_instruct_q4_k_m` is a Multilingual model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llama_3.2_1b_instruct_q4_k_m_xx_5.5.1_3.0_1730231067664.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llama_3.2_1b_instruct_q4_k_m_xx_5.5.1_3.0_1730231067664.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_1b_instruct_q4_k_m","xx") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_1b_instruct_q4_k_m", "xx") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llama_3.2_1b_instruct_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|xx| +|Size:|793.2 MB| + +## References + +https://huggingface.co/lmstudio-community/Llama-3.2-1B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_1b_instruct_q6_k_xx.md b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_1b_instruct_q6_k_xx.md new file mode 100644 index 00000000000000..3b5030e358224d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_1b_instruct_q6_k_xx.md @@ -0,0 +1,101 @@ +--- +layout: model +title: Multilingual llama_3.2_1b_instruct_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: llama_3.2_1b_instruct_q6_k +date: 2024-10-29 +tags: [xx, open_source, onnx, conversational, text_generation, text_to_text, en, de, fr, it, pt, hi, es, th, llamacpp] +task: Text Generation +language: xx +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llama_3.2_1b_instruct_q6_k` is a Multilingual model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llama_3.2_1b_instruct_q6_k_xx_5.5.1_3.0_1730231113608.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llama_3.2_1b_instruct_q6_k_xx_5.5.1_3.0_1730231113608.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_1b_instruct_q6_k","xx") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_1b_instruct_q6_k", "xx") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llama_3.2_1b_instruct_q6_k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|xx| +|Size:|1.0 GB| + +## References + +https://huggingface.co/lmstudio-community/Llama-3.2-1B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_1b_instruct_q8_0_xx.md b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_1b_instruct_q8_0_xx.md new file mode 100644 index 00000000000000..279a499da47c87 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_1b_instruct_q8_0_xx.md @@ -0,0 +1,101 @@ +--- +layout: model +title: Multilingual llama_3.2_1b_instruct_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: llama_3.2_1b_instruct_q8_0 +date: 2024-10-29 +tags: [xx, open_source, onnx, conversational, text_generation, text_to_text, en, de, fr, it, pt, hi, es, th, llamacpp] +task: Text Generation +language: xx +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llama_3.2_1b_instruct_q8_0` is a Multilingual model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llama_3.2_1b_instruct_q8_0_xx_5.5.1_3.0_1730231173687.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llama_3.2_1b_instruct_q8_0_xx_5.5.1_3.0_1730231173687.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_1b_instruct_q8_0","xx") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_1b_instruct_q8_0", "xx") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llama_3.2_1b_instruct_q8_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|xx| +|Size:|1.3 GB| + +## References + +https://huggingface.co/lmstudio-community/Llama-3.2-1B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_3b_instruct_q3_k_l_xx.md b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_3b_instruct_q3_k_l_xx.md new file mode 100644 index 00000000000000..54e5aa6b61c938 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_3b_instruct_q3_k_l_xx.md @@ -0,0 +1,101 @@ +--- +layout: model +title: Multilingual llama_3.2_3b_instruct_q3_k_l AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: llama_3.2_3b_instruct_q3_k_l +date: 2024-10-29 +tags: [xx, open_source, onnx, conversational, text_generation, text_to_text, en, de, fr, it, pt, hi, es, th, tensorflow] +task: Text Generation +language: xx +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llama_3.2_3b_instruct_q3_k_l` is a Multilingual model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llama_3.2_3b_instruct_q3_k_l_xx_5.5.1_3.0_1730199983558.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llama_3.2_3b_instruct_q3_k_l_xx_5.5.1_3.0_1730199983558.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_3b_instruct_q3_k_l","xx") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_3b_instruct_q3_k_l", "xx") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llama_3.2_3b_instruct_q3_k_l| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|xx| +|Size:|1.8 GB| + +## References + +https://huggingface.co/lmstudio-community/Llama-3.2-3B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_3b_instruct_q4_k_m_xx.md b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_3b_instruct_q4_k_m_xx.md new file mode 100644 index 00000000000000..748d054a04425f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_3b_instruct_q4_k_m_xx.md @@ -0,0 +1,101 @@ +--- +layout: model +title: Multilingual llama_3.2_3b_instruct_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: llama_3.2_3b_instruct_q4_k_m +date: 2024-10-29 +tags: [xx, open_source, onnx, conversational, text_generation, text_to_text, en, de, fr, it, pt, hi, es, th, tensorflow] +task: Text Generation +language: xx +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llama_3.2_3b_instruct_q4_k_m` is a Multilingual model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llama_3.2_3b_instruct_q4_k_m_xx_5.5.1_3.0_1730200073838.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llama_3.2_3b_instruct_q4_k_m_xx_5.5.1_3.0_1730200073838.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_3b_instruct_q4_k_m","xx") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_3b_instruct_q4_k_m", "xx") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llama_3.2_3b_instruct_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|xx| +|Size:|2.0 GB| + +## References + +https://huggingface.co/lmstudio-community/Llama-3.2-3B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_3b_instruct_q6_k_xx.md b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_3b_instruct_q6_k_xx.md new file mode 100644 index 00000000000000..24b4cfc31890cd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_3b_instruct_q6_k_xx.md @@ -0,0 +1,101 @@ +--- +layout: model +title: Multilingual llama_3.2_3b_instruct_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: llama_3.2_3b_instruct_q6_k +date: 2024-10-29 +tags: [xx, open_source, onnx, conversational, text_generation, text_to_text, en, de, fr, it, pt, hi, es, th, tensorflow] +task: Text Generation +language: xx +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llama_3.2_3b_instruct_q6_k` is a Multilingual model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llama_3.2_3b_instruct_q6_k_xx_5.5.1_3.0_1730200181182.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llama_3.2_3b_instruct_q6_k_xx_5.5.1_3.0_1730200181182.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_3b_instruct_q6_k","xx") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_3b_instruct_q6_k", "xx") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llama_3.2_3b_instruct_q6_k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|xx| +|Size:|2.6 GB| + +## References + +https://huggingface.co/lmstudio-community/Llama-3.2-3B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_3b_instruct_q8_0_xx.md b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_3b_instruct_q8_0_xx.md new file mode 100644 index 00000000000000..ed0d66be5211b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-llama_3.2_3b_instruct_q8_0_xx.md @@ -0,0 +1,101 @@ +--- +layout: model +title: Multilingual llama_3.2_3b_instruct_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: llama_3.2_3b_instruct_q8_0 +date: 2024-10-29 +tags: [xx, open_source, onnx, conversational, text_generation, text_to_text, en, de, fr, it, pt, hi, es, th, tensorflow] +task: Text Generation +language: xx +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llama_3.2_3b_instruct_q8_0` is a Multilingual model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llama_3.2_3b_instruct_q8_0_xx_5.5.1_3.0_1730200322185.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llama_3.2_3b_instruct_q8_0_xx_5.5.1_3.0_1730200322185.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_3b_instruct_q8_0","xx") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("llama_3.2_3b_instruct_q8_0", "xx") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llama_3.2_3b_instruct_q8_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|xx| +|Size:|3.3 GB| + +## References + +https://huggingface.co/lmstudio-community/Llama-3.2-3B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-mathstral_7b_v0.1_iq4_xs_en.md b/docs/_posts/ahmedlone127/2024-10-29-mathstral_7b_v0.1_iq4_xs_en.md new file mode 100644 index 00000000000000..dec6688b049012 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-mathstral_7b_v0.1_iq4_xs_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English mathstral_7b_v0.1_iq4_xs AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: mathstral_7b_v0.1_iq4_xs +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mathstral_7b_v0.1_iq4_xs` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mathstral_7b_v0.1_iq4_xs_en_5.5.1_3.0_1730237800768.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mathstral_7b_v0.1_iq4_xs_en_5.5.1_3.0_1730237800768.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("mathstral_7b_v0.1_iq4_xs","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("mathstral_7b_v0.1_iq4_xs", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mathstral_7b_v0.1_iq4_xs| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|3.9 GB| + +## References + +https://huggingface.co/lmstudio-community/mathstral-7B-v0.1-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-mathstral_7b_v0.1_q3_k_l_en.md b/docs/_posts/ahmedlone127/2024-10-29-mathstral_7b_v0.1_q3_k_l_en.md new file mode 100644 index 00000000000000..890222b79ac778 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-mathstral_7b_v0.1_q3_k_l_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English mathstral_7b_v0.1_q3_k_l AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: mathstral_7b_v0.1_q3_k_l +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mathstral_7b_v0.1_q3_k_l` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mathstral_7b_v0.1_q3_k_l_en_5.5.1_3.0_1730237954284.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mathstral_7b_v0.1_q3_k_l_en_5.5.1_3.0_1730237954284.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("mathstral_7b_v0.1_q3_k_l","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("mathstral_7b_v0.1_q3_k_l", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mathstral_7b_v0.1_q3_k_l| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|3.8 GB| + +## References + +https://huggingface.co/lmstudio-community/mathstral-7B-v0.1-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-meta_llama_3_8b_instruct_iq3_m_en.md b/docs/_posts/ahmedlone127/2024-10-29-meta_llama_3_8b_instruct_iq3_m_en.md new file mode 100644 index 00000000000000..5390bf562028d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-meta_llama_3_8b_instruct_iq3_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English meta_llama_3_8b_instruct_iq3_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: meta_llama_3_8b_instruct_iq3_m +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`meta_llama_3_8b_instruct_iq3_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/meta_llama_3_8b_instruct_iq3_m_en_5.5.1_3.0_1730232269372.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/meta_llama_3_8b_instruct_iq3_m_en_5.5.1_3.0_1730232269372.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("meta_llama_3_8b_instruct_iq3_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("meta_llama_3_8b_instruct_iq3_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|meta_llama_3_8b_instruct_iq3_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|3.8 GB| + +## References + +https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-mistral_7b_instruct_v0.3_iq3_m_en.md b/docs/_posts/ahmedlone127/2024-10-29-mistral_7b_instruct_v0.3_iq3_m_en.md new file mode 100644 index 00000000000000..4554390b62da18 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-mistral_7b_instruct_v0.3_iq3_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English mistral_7b_instruct_v0.3_iq3_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: mistral_7b_instruct_v0.3_iq3_m +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mistral_7b_instruct_v0.3_iq3_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mistral_7b_instruct_v0.3_iq3_m_en_5.5.1_3.0_1730231778040.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mistral_7b_instruct_v0.3_iq3_m_en_5.5.1_3.0_1730231778040.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("mistral_7b_instruct_v0.3_iq3_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("mistral_7b_instruct_v0.3_iq3_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mistral_7b_instruct_v0.3_iq3_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|3.3 GB| + +## References + +https://huggingface.co/lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-mistral_7b_instruct_v0.3_q3_k_l_en.md b/docs/_posts/ahmedlone127/2024-10-29-mistral_7b_instruct_v0.3_q3_k_l_en.md new file mode 100644 index 00000000000000..a9759f5aaca385 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-mistral_7b_instruct_v0.3_q3_k_l_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English mistral_7b_instruct_v0.3_q3_k_l AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: mistral_7b_instruct_v0.3_q3_k_l +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mistral_7b_instruct_v0.3_q3_k_l` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mistral_7b_instruct_v0.3_q3_k_l_en_5.5.1_3.0_1730231916921.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mistral_7b_instruct_v0.3_q3_k_l_en_5.5.1_3.0_1730231916921.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("mistral_7b_instruct_v0.3_q3_k_l","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("mistral_7b_instruct_v0.3_q3_k_l", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mistral_7b_instruct_v0.3_q3_k_l| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|3.8 GB| + +## References + +https://huggingface.co/lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-phi_3.1_mini_4k_instruct_iq3_m_en.md b/docs/_posts/ahmedlone127/2024-10-29-phi_3.1_mini_4k_instruct_iq3_m_en.md new file mode 100644 index 00000000000000..ca28f2acfd1ae4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-phi_3.1_mini_4k_instruct_iq3_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English phi_3.1_mini_4k_instruct_iq3_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: phi_3.1_mini_4k_instruct_iq3_m +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`phi_3.1_mini_4k_instruct_iq3_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/phi_3.1_mini_4k_instruct_iq3_m_en_5.5.1_3.0_1730235527993.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/phi_3.1_mini_4k_instruct_iq3_m_en_5.5.1_3.0_1730235527993.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("phi_3.1_mini_4k_instruct_iq3_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("phi_3.1_mini_4k_instruct_iq3_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|phi_3.1_mini_4k_instruct_iq3_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.8 GB| + +## References + +https://huggingface.co/lmstudio-community/Phi-3.1-mini-4k-instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_f32_en.md b/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_f32_en.md new file mode 100644 index 00000000000000..36301b9a52ac78 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_f32_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2_500m_instruct_f32 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2_500m_instruct_f32 +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2_500m_instruct_f32` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2_500m_instruct_f32_en_5.5.1_3.0_1730243787866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2_500m_instruct_f32_en_5.5.1_3.0_1730243787866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2_500m_instruct_f32","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2_500m_instruct_f32", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2_500m_instruct_f32| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|930.2 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2-500M-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_iq4_xs_en.md b/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_iq4_xs_en.md new file mode 100644 index 00000000000000..c8187e025e94a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_iq4_xs_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2_500m_instruct_iq4_xs AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2_500m_instruct_iq4_xs +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2_500m_instruct_iq4_xs` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2_500m_instruct_iq4_xs_en_5.5.1_3.0_1730243384741.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2_500m_instruct_iq4_xs_en_5.5.1_3.0_1730243384741.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2_500m_instruct_iq4_xs","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2_500m_instruct_iq4_xs", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2_500m_instruct_iq4_xs| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|336.2 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2-500M-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_q4_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_q4_k_m_en.md new file mode 100644 index 00000000000000..7d4471cbeacda0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_q4_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2_500m_instruct_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2_500m_instruct_q4_k_m +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2_500m_instruct_q4_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2_500m_instruct_q4_k_m_en_5.5.1_3.0_1730243404853.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2_500m_instruct_q4_k_m_en_5.5.1_3.0_1730243404853.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2_500m_instruct_q4_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2_500m_instruct_q4_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2_500m_instruct_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|387.3 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2-500M-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_q5_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_q5_k_m_en.md new file mode 100644 index 00000000000000..26a7c03e3611ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_q5_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2_500m_instruct_q5_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2_500m_instruct_q5_k_m +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2_500m_instruct_q5_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2_500m_instruct_q5_k_m_en_5.5.1_3.0_1730243426822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2_500m_instruct_q5_k_m_en_5.5.1_3.0_1730243426822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2_500m_instruct_q5_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2_500m_instruct_q5_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2_500m_instruct_q5_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2-500M-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_q6_k_en.md b/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_q6_k_en.md new file mode 100644 index 00000000000000..2c90b235c1c066 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_q6_k_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2_500m_instruct_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2_500m_instruct_q6_k +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2_500m_instruct_q6_k` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2_500m_instruct_q6_k_en_5.5.1_3.0_1730243451242.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2_500m_instruct_q6_k_en_5.5.1_3.0_1730243451242.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2_500m_instruct_q6_k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2_500m_instruct_q6_k", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2_500m_instruct_q6_k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|485.6 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2-500M-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_q8_0_en.md b/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_q8_0_en.md new file mode 100644 index 00000000000000..0ff73dbf5a6768 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-qwen2_500m_instruct_q8_0_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2_500m_instruct_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2_500m_instruct_q8_0 +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2_500m_instruct_q8_0` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2_500m_instruct_q8_0_en_5.5.1_3.0_1730243477404.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2_500m_instruct_q8_0_en_5.5.1_3.0_1730243477404.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2_500m_instruct_q8_0","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2_500m_instruct_q8_0", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2_500m_instruct_q8_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|506.7 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2-500M-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_iq4_xs_en.md b/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_iq4_xs_en.md new file mode 100644 index 00000000000000..aa11aacfae37c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_iq4_xs_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2_math_1.5b_instruct_iq4_xs AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2_math_1.5b_instruct_iq4_xs +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2_math_1.5b_instruct_iq4_xs` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2_math_1.5b_instruct_iq4_xs_en_5.5.1_3.0_1730238713389.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2_math_1.5b_instruct_iq4_xs_en_5.5.1_3.0_1730238713389.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2_math_1.5b_instruct_iq4_xs","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2_math_1.5b_instruct_iq4_xs", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2_math_1.5b_instruct_iq4_xs| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|878.6 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2-Math-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_q4_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_q4_k_m_en.md new file mode 100644 index 00000000000000..2215e50049b7d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_q4_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2_math_1.5b_instruct_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2_math_1.5b_instruct_q4_k_m +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2_math_1.5b_instruct_q4_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2_math_1.5b_instruct_q4_k_m_en_5.5.1_3.0_1730238760156.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2_math_1.5b_instruct_q4_k_m_en_5.5.1_3.0_1730238760156.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2_math_1.5b_instruct_q4_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2_math_1.5b_instruct_q4_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2_math_1.5b_instruct_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|965.9 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2-Math-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_q5_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_q5_k_m_en.md new file mode 100644 index 00000000000000..1fd3f774ec2b4e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_q5_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2_math_1.5b_instruct_q5_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2_math_1.5b_instruct_q5_k_m +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2_math_1.5b_instruct_q5_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2_math_1.5b_instruct_q5_k_m_en_5.5.1_3.0_1730238815393.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2_math_1.5b_instruct_q5_k_m_en_5.5.1_3.0_1730238815393.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2_math_1.5b_instruct_q5_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2_math_1.5b_instruct_q5_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2_math_1.5b_instruct_q5_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.1 GB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2-Math-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_q6_k_en.md b/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_q6_k_en.md new file mode 100644 index 00000000000000..761748f9ea3286 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_q6_k_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2_math_1.5b_instruct_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2_math_1.5b_instruct_q6_k +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2_math_1.5b_instruct_q6_k` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2_math_1.5b_instruct_q6_k_en_5.5.1_3.0_1730238877334.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2_math_1.5b_instruct_q6_k_en_5.5.1_3.0_1730238877334.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2_math_1.5b_instruct_q6_k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2_math_1.5b_instruct_q6_k", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2_math_1.5b_instruct_q6_k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2-Math-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_q8_0_en.md b/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_q8_0_en.md new file mode 100644 index 00000000000000..198d7dc7f2d40d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-qwen2_math_1.5b_instruct_q8_0_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2_math_1.5b_instruct_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2_math_1.5b_instruct_q8_0 +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2_math_1.5b_instruct_q8_0` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2_math_1.5b_instruct_q8_0_en_5.5.1_3.0_1730238954990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2_math_1.5b_instruct_q8_0_en_5.5.1_3.0_1730238954990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2_math_1.5b_instruct_q8_0","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2_math_1.5b_instruct_q8_0", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2_math_1.5b_instruct_q8_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2-Math-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-yi_coder_1.5b_chat_q4_0_4_4_en.md b/docs/_posts/ahmedlone127/2024-10-29-yi_coder_1.5b_chat_q4_0_4_4_en.md new file mode 100644 index 00000000000000..ef7fd42faf4334 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-yi_coder_1.5b_chat_q4_0_4_4_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English yi_coder_1.5b_chat_q4_0_4_4 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: yi_coder_1.5b_chat_q4_0_4_4 +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yi_coder_1.5b_chat_q4_0_4_4` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_chat_q4_0_4_4_en_5.5.1_3.0_1730239385570.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_chat_q4_0_4_4_en_5.5.1_3.0_1730239385570.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_chat_q4_0_4_4","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_chat_q4_0_4_4", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yi_coder_1.5b_chat_q4_0_4_4| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|824.5 MB| + +## References + +https://huggingface.co/lmstudio-community/Yi-Coder-1.5B-Chat-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-yi_coder_1.5b_chat_q4_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-29-yi_coder_1.5b_chat_q4_k_m_en.md new file mode 100644 index 00000000000000..8ab447212f5c0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-yi_coder_1.5b_chat_q4_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English yi_coder_1.5b_chat_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: yi_coder_1.5b_chat_q4_k_m +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yi_coder_1.5b_chat_q4_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_chat_q4_k_m_en_5.5.1_3.0_1730239445307.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_chat_q4_k_m_en_5.5.1_3.0_1730239445307.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_chat_q4_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_chat_q4_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yi_coder_1.5b_chat_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|944.8 MB| + +## References + +https://huggingface.co/lmstudio-community/Yi-Coder-1.5B-Chat-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-yi_coder_1.5b_chat_q6_k_en.md b/docs/_posts/ahmedlone127/2024-10-29-yi_coder_1.5b_chat_q6_k_en.md new file mode 100644 index 00000000000000..419ec9cdc45f2b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-yi_coder_1.5b_chat_q6_k_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English yi_coder_1.5b_chat_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: yi_coder_1.5b_chat_q6_k +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yi_coder_1.5b_chat_q6_k` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_chat_q6_k_en_5.5.1_3.0_1730239509643.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_chat_q6_k_en_5.5.1_3.0_1730239509643.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_chat_q6_k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_chat_q6_k", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yi_coder_1.5b_chat_q6_k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/lmstudio-community/Yi-Coder-1.5B-Chat-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-29-yi_coder_1.5b_chat_q8_0_en.md b/docs/_posts/ahmedlone127/2024-10-29-yi_coder_1.5b_chat_q8_0_en.md new file mode 100644 index 00000000000000..2669a67c077275 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-29-yi_coder_1.5b_chat_q8_0_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English yi_coder_1.5b_chat_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: yi_coder_1.5b_chat_q8_0 +date: 2024-10-29 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yi_coder_1.5b_chat_q8_0` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_chat_q8_0_en_5.5.1_3.0_1730239602151.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_chat_q8_0_en_5.5.1_3.0_1730239602151.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_chat_q8_0","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_chat_q8_0", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yi_coder_1.5b_chat_q8_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/lmstudio-community/Yi-Coder-1.5B-Chat-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-alchemistcoder_ds_6.7b_iq4_xs_en.md b/docs/_posts/ahmedlone127/2024-10-30-alchemistcoder_ds_6.7b_iq4_xs_en.md new file mode 100644 index 00000000000000..359bbc77ae8266 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-alchemistcoder_ds_6.7b_iq4_xs_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English alchemistcoder_ds_6.7b_iq4_xs AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: alchemistcoder_ds_6.7b_iq4_xs +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`alchemistcoder_ds_6.7b_iq4_xs` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/alchemistcoder_ds_6.7b_iq4_xs_en_5.5.1_3.0_1730265657929.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/alchemistcoder_ds_6.7b_iq4_xs_en_5.5.1_3.0_1730265657929.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("alchemistcoder_ds_6.7b_iq4_xs","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("alchemistcoder_ds_6.7b_iq4_xs", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|alchemistcoder_ds_6.7b_iq4_xs| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|3.6 GB| + +## References + +https://huggingface.co/lmstudio-community/AlchemistCoder-DS-6.7B-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-alchemistcoder_l_7b_iq4_xs_en.md b/docs/_posts/ahmedlone127/2024-10-30-alchemistcoder_l_7b_iq4_xs_en.md new file mode 100644 index 00000000000000..09e05f27f203e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-alchemistcoder_l_7b_iq4_xs_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English alchemistcoder_l_7b_iq4_xs AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: alchemistcoder_l_7b_iq4_xs +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`alchemistcoder_l_7b_iq4_xs` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/alchemistcoder_l_7b_iq4_xs_en_5.5.1_3.0_1730263239155.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/alchemistcoder_l_7b_iq4_xs_en_5.5.1_3.0_1730263239155.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("alchemistcoder_l_7b_iq4_xs","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("alchemistcoder_l_7b_iq4_xs", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|alchemistcoder_l_7b_iq4_xs| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|3.6 GB| + +## References + +https://huggingface.co/lmstudio-community/AlchemistCoder-L-7B-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-codellama_7b_kstack_clean_iq3_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-codellama_7b_kstack_clean_iq3_m_en.md new file mode 100644 index 00000000000000..c92ec14d37dced --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-codellama_7b_kstack_clean_iq3_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English codellama_7b_kstack_clean_iq3_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: codellama_7b_kstack_clean_iq3_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`codellama_7b_kstack_clean_iq3_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/codellama_7b_kstack_clean_iq3_m_en_5.5.1_3.0_1730260332734.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/codellama_7b_kstack_clean_iq3_m_en_5.5.1_3.0_1730260332734.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("codellama_7b_kstack_clean_iq3_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("codellama_7b_kstack_clean_iq3_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|codellama_7b_kstack_clean_iq3_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|3.1 GB| + +## References + +https://huggingface.co/lmstudio-community/CodeLlama-7B-KStack-clean-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-codellama_7b_kstack_iq3_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-codellama_7b_kstack_iq3_m_en.md new file mode 100644 index 00000000000000..f002328ee30761 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-codellama_7b_kstack_iq3_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English codellama_7b_kstack_iq3_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: codellama_7b_kstack_iq3_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`codellama_7b_kstack_iq3_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/codellama_7b_kstack_iq3_m_en_5.5.1_3.0_1730249651434.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/codellama_7b_kstack_iq3_m_en_5.5.1_3.0_1730249651434.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("codellama_7b_kstack_iq3_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("codellama_7b_kstack_iq3_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|codellama_7b_kstack_iq3_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|3.1 GB| + +## References + +https://huggingface.co/lmstudio-community/CodeLlama-7B-KStack-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_1.3b_kexer_iq3_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_1.3b_kexer_iq3_m_en.md new file mode 100644 index 00000000000000..a91521acc335da --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_1.3b_kexer_iq3_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English deepseek_coder_1.3b_kexer_iq3_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: deepseek_coder_1.3b_kexer_iq3_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deepseek_coder_1.3b_kexer_iq3_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deepseek_coder_1.3b_kexer_iq3_m_en_5.5.1_3.0_1730266269461.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deepseek_coder_1.3b_kexer_iq3_m_en_5.5.1_3.0_1730266269461.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("deepseek_coder_1.3b_kexer_iq3_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("deepseek_coder_1.3b_kexer_iq3_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deepseek_coder_1.3b_kexer_iq3_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|667.7 MB| + +## References + +https://huggingface.co/lmstudio-community/deepseek-coder-1.3B-kexer-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_1.3b_kexer_q4_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_1.3b_kexer_q4_k_m_en.md new file mode 100644 index 00000000000000..f491a2243fa0fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_1.3b_kexer_q4_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English deepseek_coder_1.3b_kexer_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: deepseek_coder_1.3b_kexer_q4_k_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deepseek_coder_1.3b_kexer_q4_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deepseek_coder_1.3b_kexer_q4_k_m_en_5.5.1_3.0_1730266308804.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deepseek_coder_1.3b_kexer_q4_k_m_en_5.5.1_3.0_1730266308804.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("deepseek_coder_1.3b_kexer_q4_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("deepseek_coder_1.3b_kexer_q4_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deepseek_coder_1.3b_kexer_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|856.9 MB| + +## References + +https://huggingface.co/lmstudio-community/deepseek-coder-1.3B-kexer-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_1.3b_kexer_q6_k_en.md b/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_1.3b_kexer_q6_k_en.md new file mode 100644 index 00000000000000..91e7d7a526baed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_1.3b_kexer_q6_k_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English deepseek_coder_1.3b_kexer_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: deepseek_coder_1.3b_kexer_q6_k +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deepseek_coder_1.3b_kexer_q6_k` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deepseek_coder_1.3b_kexer_q6_k_en_5.5.1_3.0_1730266358715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deepseek_coder_1.3b_kexer_q6_k_en_5.5.1_3.0_1730266358715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("deepseek_coder_1.3b_kexer_q6_k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("deepseek_coder_1.3b_kexer_q6_k", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deepseek_coder_1.3b_kexer_q6_k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/lmstudio-community/deepseek-coder-1.3B-kexer-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_1.3b_kexer_q8_0_en.md b/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_1.3b_kexer_q8_0_en.md new file mode 100644 index 00000000000000..a76734385bfad2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_1.3b_kexer_q8_0_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English deepseek_coder_1.3b_kexer_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: deepseek_coder_1.3b_kexer_q8_0 +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deepseek_coder_1.3b_kexer_q8_0` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deepseek_coder_1.3b_kexer_q8_0_en_5.5.1_3.0_1730266423672.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deepseek_coder_1.3b_kexer_q8_0_en_5.5.1_3.0_1730266423672.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("deepseek_coder_1.3b_kexer_q8_0","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("deepseek_coder_1.3b_kexer_q8_0", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deepseek_coder_1.3b_kexer_q8_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/lmstudio-community/deepseek-coder-1.3B-kexer-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_6.7b_kexer_iq3_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_6.7b_kexer_iq3_m_en.md new file mode 100644 index 00000000000000..47f57a9498d13d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-deepseek_coder_6.7b_kexer_iq3_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English deepseek_coder_6.7b_kexer_iq3_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: deepseek_coder_6.7b_kexer_iq3_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deepseek_coder_6.7b_kexer_iq3_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deepseek_coder_6.7b_kexer_iq3_m_en_5.5.1_3.0_1730260963287.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deepseek_coder_6.7b_kexer_iq3_m_en_5.5.1_3.0_1730260963287.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("deepseek_coder_6.7b_kexer_iq3_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("deepseek_coder_6.7b_kexer_iq3_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deepseek_coder_6.7b_kexer_iq3_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|3.1 GB| + +## References + +https://huggingface.co/lmstudio-community/deepseek-coder-6.7B-kexer-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_iq4_xs_en.md b/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_iq4_xs_en.md new file mode 100644 index 00000000000000..db2548cc3df05d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_iq4_xs_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English internlm2_5_1_8b_chat_iq4_xs AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: internlm2_5_1_8b_chat_iq4_xs +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`internlm2_5_1_8b_chat_iq4_xs` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/internlm2_5_1_8b_chat_iq4_xs_en_5.5.1_3.0_1730267346885.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/internlm2_5_1_8b_chat_iq4_xs_en_5.5.1_3.0_1730267346885.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("internlm2_5_1_8b_chat_iq4_xs","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("internlm2_5_1_8b_chat_iq4_xs", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|internlm2_5_1_8b_chat_iq4_xs| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/lmstudio-community/internlm2_5-1_8b-chat-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q3_k_l_en.md b/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q3_k_l_en.md new file mode 100644 index 00000000000000..346965617257f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q3_k_l_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English internlm2_5_1_8b_chat_q3_k_l AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: internlm2_5_1_8b_chat_q3_k_l +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`internlm2_5_1_8b_chat_q3_k_l` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/internlm2_5_1_8b_chat_q3_k_l_en_5.5.1_3.0_1730267392831.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/internlm2_5_1_8b_chat_q3_k_l_en_5.5.1_3.0_1730267392831.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("internlm2_5_1_8b_chat_q3_k_l","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("internlm2_5_1_8b_chat_q3_k_l", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|internlm2_5_1_8b_chat_q3_k_l| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/lmstudio-community/internlm2_5-1_8b-chat-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q4_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q4_k_m_en.md new file mode 100644 index 00000000000000..d85e1b8d8b0e03 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q4_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English internlm2_5_1_8b_chat_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: internlm2_5_1_8b_chat_q4_k_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`internlm2_5_1_8b_chat_q4_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/internlm2_5_1_8b_chat_q4_k_m_en_5.5.1_3.0_1730267445239.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/internlm2_5_1_8b_chat_q4_k_m_en_5.5.1_3.0_1730267445239.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("internlm2_5_1_8b_chat_q4_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("internlm2_5_1_8b_chat_q4_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|internlm2_5_1_8b_chat_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/lmstudio-community/internlm2_5-1_8b-chat-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q5_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q5_k_m_en.md new file mode 100644 index 00000000000000..8db3fc3c8ee0fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q5_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English internlm2_5_1_8b_chat_q5_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: internlm2_5_1_8b_chat_q5_k_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`internlm2_5_1_8b_chat_q5_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/internlm2_5_1_8b_chat_q5_k_m_en_5.5.1_3.0_1730267500355.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/internlm2_5_1_8b_chat_q5_k_m_en_5.5.1_3.0_1730267500355.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("internlm2_5_1_8b_chat_q5_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("internlm2_5_1_8b_chat_q5_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|internlm2_5_1_8b_chat_q5_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/lmstudio-community/internlm2_5-1_8b-chat-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q6_k_en.md b/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q6_k_en.md new file mode 100644 index 00000000000000..db10871f20aab8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q6_k_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English internlm2_5_1_8b_chat_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: internlm2_5_1_8b_chat_q6_k +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`internlm2_5_1_8b_chat_q6_k` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/internlm2_5_1_8b_chat_q6_k_en_5.5.1_3.0_1730267567662.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/internlm2_5_1_8b_chat_q6_k_en_5.5.1_3.0_1730267567662.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("internlm2_5_1_8b_chat_q6_k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("internlm2_5_1_8b_chat_q6_k", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|internlm2_5_1_8b_chat_q6_k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/lmstudio-community/internlm2_5-1_8b-chat-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q8_0_en.md b/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q8_0_en.md new file mode 100644 index 00000000000000..75f74d844423a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-internlm2_5_1_8b_chat_q8_0_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English internlm2_5_1_8b_chat_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: internlm2_5_1_8b_chat_q8_0 +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`internlm2_5_1_8b_chat_q8_0` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/internlm2_5_1_8b_chat_q8_0_en_5.5.1_3.0_1730267652584.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/internlm2_5_1_8b_chat_q8_0_en_5.5.1_3.0_1730267652584.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("internlm2_5_1_8b_chat_q8_0","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("internlm2_5_1_8b_chat_q8_0", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|internlm2_5_1_8b_chat_q8_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.9 GB| + +## References + +https://huggingface.co/lmstudio-community/internlm2_5-1_8b-chat-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-meta_llama_3_8b_instruct_iq3_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-meta_llama_3_8b_instruct_iq3_m_en.md new file mode 100644 index 00000000000000..ec0ba55dbd9e6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-meta_llama_3_8b_instruct_iq3_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English meta_llama_3_8b_instruct_iq3_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: meta_llama_3_8b_instruct_iq3_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`meta_llama_3_8b_instruct_iq3_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/meta_llama_3_8b_instruct_iq3_m_en_5.5.1_3.0_1730250028909.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/meta_llama_3_8b_instruct_iq3_m_en_5.5.1_3.0_1730250028909.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("meta_llama_3_8b_instruct_iq3_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("meta_llama_3_8b_instruct_iq3_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|meta_llama_3_8b_instruct_iq3_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|3.8 GB| + +## References + +https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-BPE-fix-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_0.5b_instruct_q3_k_l_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_0.5b_instruct_q3_k_l_en.md new file mode 100644 index 00000000000000..493bc93dd70b33 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_0.5b_instruct_q3_k_l_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_0.5b_instruct_q3_k_l AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_0.5b_instruct_q3_k_l +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_0.5b_instruct_q3_k_l` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_0.5b_instruct_q3_k_l_en_5.5.1_3.0_1730250340968.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_0.5b_instruct_q3_k_l_en_5.5.1_3.0_1730250340968.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_0.5b_instruct_q3_k_l","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_0.5b_instruct_q3_k_l", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_0.5b_instruct_q3_k_l| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|356.9 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-0.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_0.5b_instruct_q4_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_0.5b_instruct_q4_k_m_en.md new file mode 100644 index 00000000000000..37f07769b2f885 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_0.5b_instruct_q4_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_0.5b_instruct_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_0.5b_instruct_q4_k_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_0.5b_instruct_q4_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_0.5b_instruct_q4_k_m_en_5.5.1_3.0_1730250361508.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_0.5b_instruct_q4_k_m_en_5.5.1_3.0_1730250361508.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_0.5b_instruct_q4_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_0.5b_instruct_q4_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_0.5b_instruct_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|386.9 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-0.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_0.5b_instruct_q6_k_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_0.5b_instruct_q6_k_en.md new file mode 100644 index 00000000000000..bfd358fd78628c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_0.5b_instruct_q6_k_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_0.5b_instruct_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_0.5b_instruct_q6_k +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_0.5b_instruct_q6_k` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_0.5b_instruct_q6_k_en_5.5.1_3.0_1730250386292.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_0.5b_instruct_q6_k_en_5.5.1_3.0_1730250386292.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_0.5b_instruct_q6_k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_0.5b_instruct_q6_k", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_0.5b_instruct_q6_k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|485.3 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-0.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_0.5b_instruct_q8_0_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_0.5b_instruct_q8_0_en.md new file mode 100644 index 00000000000000..9518aeecb7dc76 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_0.5b_instruct_q8_0_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_0.5b_instruct_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_0.5b_instruct_q8_0 +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_0.5b_instruct_q8_0` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_0.5b_instruct_q8_0_en_5.5.1_3.0_1730250413397.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_0.5b_instruct_q8_0_en_5.5.1_3.0_1730250413397.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_0.5b_instruct_q8_0","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_0.5b_instruct_q8_0", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_0.5b_instruct_q8_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|506.3 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-0.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_1.5b_instruct_q3_k_l_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_1.5b_instruct_q3_k_l_en.md new file mode 100644 index 00000000000000..18fff1b077430a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_1.5b_instruct_q3_k_l_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_1.5b_instruct_q3_k_l AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_1.5b_instruct_q3_k_l +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_1.5b_instruct_q3_k_l` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_1.5b_instruct_q3_k_l_en_5.5.1_3.0_1730251052001.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_1.5b_instruct_q3_k_l_en_5.5.1_3.0_1730251052001.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_1.5b_instruct_q3_k_l","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_1.5b_instruct_q3_k_l", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_1.5b_instruct_q3_k_l| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|874.6 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_1.5b_instruct_q4_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_1.5b_instruct_q4_k_m_en.md new file mode 100644 index 00000000000000..aebd366b1df9da --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_1.5b_instruct_q4_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_1.5b_instruct_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_1.5b_instruct_q4_k_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_1.5b_instruct_q4_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_1.5b_instruct_q4_k_m_en_5.5.1_3.0_1730251099082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_1.5b_instruct_q4_k_m_en_5.5.1_3.0_1730251099082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_1.5b_instruct_q4_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_1.5b_instruct_q4_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_1.5b_instruct_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|966.5 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_1.5b_instruct_q6_k_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_1.5b_instruct_q6_k_en.md new file mode 100644 index 00000000000000..25811827254a12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_1.5b_instruct_q6_k_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_1.5b_instruct_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_1.5b_instruct_q6_k +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_1.5b_instruct_q6_k` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_1.5b_instruct_q6_k_en_5.5.1_3.0_1730251153966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_1.5b_instruct_q6_k_en_5.5.1_3.0_1730251153966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_1.5b_instruct_q6_k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_1.5b_instruct_q6_k", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_1.5b_instruct_q6_k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_1.5b_instruct_q8_0_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_1.5b_instruct_q8_0_en.md new file mode 100644 index 00000000000000..e2fef4ba8e2bc0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_1.5b_instruct_q8_0_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_1.5b_instruct_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_1.5b_instruct_q8_0 +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_1.5b_instruct_q8_0` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_1.5b_instruct_q8_0_en_5.5.1_3.0_1730251224464.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_1.5b_instruct_q8_0_en_5.5.1_3.0_1730251224464.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_1.5b_instruct_q8_0","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_1.5b_instruct_q8_0", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_1.5b_instruct_q8_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_3b_instruct_q3_k_l_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_3b_instruct_q3_k_l_en.md new file mode 100644 index 00000000000000..56ab7ad7f483e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_3b_instruct_q3_k_l_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_3b_instruct_q3_k_l AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_3b_instruct_q3_k_l +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_3b_instruct_q3_k_l` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_3b_instruct_q3_k_l_en_5.5.1_3.0_1730247550382.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_3b_instruct_q3_k_l_en_5.5.1_3.0_1730247550382.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_3b_instruct_q3_k_l","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_3b_instruct_q3_k_l", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_3b_instruct_q3_k_l| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-3B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_3b_instruct_q4_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_3b_instruct_q4_k_m_en.md new file mode 100644 index 00000000000000..00bbdded3a2cd2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_3b_instruct_q4_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_3b_instruct_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_3b_instruct_q4_k_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_3b_instruct_q4_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_3b_instruct_q4_k_m_en_5.5.1_3.0_1730247635762.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_3b_instruct_q4_k_m_en_5.5.1_3.0_1730247635762.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_3b_instruct_q4_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_3b_instruct_q4_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_3b_instruct_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.9 GB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-3B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_3b_instruct_q6_k_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_3b_instruct_q6_k_en.md new file mode 100644 index 00000000000000..153394b01b8fc0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_3b_instruct_q6_k_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_3b_instruct_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_3b_instruct_q6_k +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_3b_instruct_q6_k` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_3b_instruct_q6_k_en_5.5.1_3.0_1730247736241.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_3b_instruct_q6_k_en_5.5.1_3.0_1730247736241.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_3b_instruct_q6_k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_3b_instruct_q6_k", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_3b_instruct_q6_k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|2.5 GB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-3B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_3b_instruct_q8_0_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_3b_instruct_q8_0_en.md new file mode 100644 index 00000000000000..c3d121bfc28e47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_3b_instruct_q8_0_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_3b_instruct_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_3b_instruct_q8_0 +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_3b_instruct_q8_0` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_3b_instruct_q8_0_en_5.5.1_3.0_1730247872137.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_3b_instruct_q8_0_en_5.5.1_3.0_1730247872137.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_3b_instruct_q8_0","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_3b_instruct_q8_0", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_3b_instruct_q8_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|3.1 GB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-3B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_coder_1.5b_instruct_q3_k_l_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_coder_1.5b_instruct_q3_k_l_en.md new file mode 100644 index 00000000000000..c8703765f7ecf9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_coder_1.5b_instruct_q3_k_l_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_coder_1.5b_instruct_q3_k_l AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_coder_1.5b_instruct_q3_k_l +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_coder_1.5b_instruct_q3_k_l` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_coder_1.5b_instruct_q3_k_l_en_5.5.1_3.0_1730258490977.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_coder_1.5b_instruct_q3_k_l_en_5.5.1_3.0_1730258490977.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_coder_1.5b_instruct_q3_k_l","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_coder_1.5b_instruct_q3_k_l", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_coder_1.5b_instruct_q3_k_l| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|874.6 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-Coder-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_coder_1.5b_instruct_q4_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_coder_1.5b_instruct_q4_k_m_en.md new file mode 100644 index 00000000000000..c69ea2ba360b7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_coder_1.5b_instruct_q4_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_coder_1.5b_instruct_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_coder_1.5b_instruct_q4_k_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_coder_1.5b_instruct_q4_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_coder_1.5b_instruct_q4_k_m_en_5.5.1_3.0_1730258536773.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_coder_1.5b_instruct_q4_k_m_en_5.5.1_3.0_1730258536773.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_coder_1.5b_instruct_q4_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_coder_1.5b_instruct_q4_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_coder_1.5b_instruct_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|966.8 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-Coder-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_coder_1.5b_instruct_q6_k_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_coder_1.5b_instruct_q6_k_en.md new file mode 100644 index 00000000000000..62a7bc61fccc5e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_coder_1.5b_instruct_q6_k_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_coder_1.5b_instruct_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_coder_1.5b_instruct_q6_k +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_coder_1.5b_instruct_q6_k` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_coder_1.5b_instruct_q6_k_en_5.5.1_3.0_1730258590883.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_coder_1.5b_instruct_q6_k_en_5.5.1_3.0_1730258590883.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_coder_1.5b_instruct_q6_k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_coder_1.5b_instruct_q6_k", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_coder_1.5b_instruct_q6_k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-Coder-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_coder_1.5b_instruct_q8_0_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_coder_1.5b_instruct_q8_0_en.md new file mode 100644 index 00000000000000..1ff63323711567 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_coder_1.5b_instruct_q8_0_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_coder_1.5b_instruct_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_coder_1.5b_instruct_q8_0 +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_coder_1.5b_instruct_q8_0` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_coder_1.5b_instruct_q8_0_en_5.5.1_3.0_1730258661015.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_coder_1.5b_instruct_q8_0_en_5.5.1_3.0_1730258661015.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_coder_1.5b_instruct_q8_0","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_coder_1.5b_instruct_q8_0", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_coder_1.5b_instruct_q8_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-Coder-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_math_1.5b_instruct_q3_k_l_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_math_1.5b_instruct_q3_k_l_en.md new file mode 100644 index 00000000000000..6f3fd80931d8cd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_math_1.5b_instruct_q3_k_l_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_math_1.5b_instruct_q3_k_l AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_math_1.5b_instruct_q3_k_l +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_math_1.5b_instruct_q3_k_l` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_math_1.5b_instruct_q3_k_l_en_5.5.1_3.0_1730265177084.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_math_1.5b_instruct_q3_k_l_en_5.5.1_3.0_1730265177084.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_math_1.5b_instruct_q3_k_l","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_math_1.5b_instruct_q3_k_l", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_math_1.5b_instruct_q3_k_l| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|873.1 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-Math-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_math_1.5b_instruct_q4_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_math_1.5b_instruct_q4_k_m_en.md new file mode 100644 index 00000000000000..1006e9ec404f20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_math_1.5b_instruct_q4_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_math_1.5b_instruct_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_math_1.5b_instruct_q4_k_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_math_1.5b_instruct_q4_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_math_1.5b_instruct_q4_k_m_en_5.5.1_3.0_1730265223021.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_math_1.5b_instruct_q4_k_m_en_5.5.1_3.0_1730265223021.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_math_1.5b_instruct_q4_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_math_1.5b_instruct_q4_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_math_1.5b_instruct_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|965.4 MB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-Math-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_math_1.5b_instruct_q6_k_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_math_1.5b_instruct_q6_k_en.md new file mode 100644 index 00000000000000..e724f5660e7789 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_math_1.5b_instruct_q6_k_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_math_1.5b_instruct_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_math_1.5b_instruct_q6_k +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_math_1.5b_instruct_q6_k` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_math_1.5b_instruct_q6_k_en_5.5.1_3.0_1730265276620.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_math_1.5b_instruct_q6_k_en_5.5.1_3.0_1730265276620.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_math_1.5b_instruct_q6_k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_math_1.5b_instruct_q6_k", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_math_1.5b_instruct_q6_k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-Math-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_math_1.5b_instruct_q8_0_en.md b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_math_1.5b_instruct_q8_0_en.md new file mode 100644 index 00000000000000..aef024d3d20025 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-qwen2.5_math_1.5b_instruct_q8_0_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English qwen2.5_math_1.5b_instruct_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: qwen2.5_math_1.5b_instruct_q8_0 +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qwen2.5_math_1.5b_instruct_q8_0` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qwen2.5_math_1.5b_instruct_q8_0_en_5.5.1_3.0_1730265348303.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qwen2.5_math_1.5b_instruct_q8_0_en_5.5.1_3.0_1730265348303.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_math_1.5b_instruct_q8_0","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("qwen2.5_math_1.5b_instruct_q8_0", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qwen2.5_math_1.5b_instruct_q8_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/lmstudio-community/Qwen2.5-Math-1.5B-Instruct-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-yi_1.5_6b_chat_q3_k_l_en.md b/docs/_posts/ahmedlone127/2024-10-30-yi_1.5_6b_chat_q3_k_l_en.md new file mode 100644 index 00000000000000..916f79e1afa12e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-yi_1.5_6b_chat_q3_k_l_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English yi_1.5_6b_chat_q3_k_l AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: yi_1.5_6b_chat_q3_k_l +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yi_1.5_6b_chat_q3_k_l` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yi_1.5_6b_chat_q3_k_l_en_5.5.1_3.0_1730262370878.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yi_1.5_6b_chat_q3_k_l_en_5.5.1_3.0_1730262370878.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("yi_1.5_6b_chat_q3_k_l","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("yi_1.5_6b_chat_q3_k_l", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yi_1.5_6b_chat_q3_k_l| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|3.2 GB| + +## References + +https://huggingface.co/lmstudio-community/Yi-1.5-6B-Chat-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-yi_1.5_6b_chat_q4_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-yi_1.5_6b_chat_q4_k_m_en.md new file mode 100644 index 00000000000000..a8e83a0d8aca6e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-yi_1.5_6b_chat_q4_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English yi_1.5_6b_chat_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: yi_1.5_6b_chat_q4_k_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yi_1.5_6b_chat_q4_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yi_1.5_6b_chat_q4_k_m_en_5.5.1_3.0_1730262528006.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yi_1.5_6b_chat_q4_k_m_en_5.5.1_3.0_1730262528006.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("yi_1.5_6b_chat_q4_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("yi_1.5_6b_chat_q4_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yi_1.5_6b_chat_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|3.6 GB| + +## References + +https://huggingface.co/lmstudio-community/Yi-1.5-6B-Chat-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-yi_coder_1.5b_q4_0_4_4_en.md b/docs/_posts/ahmedlone127/2024-10-30-yi_coder_1.5b_q4_0_4_4_en.md new file mode 100644 index 00000000000000..8e6634c9be0f7c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-yi_coder_1.5b_q4_0_4_4_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English yi_coder_1.5b_q4_0_4_4 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: yi_coder_1.5b_q4_0_4_4 +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yi_coder_1.5b_q4_0_4_4` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_q4_0_4_4_en_5.5.1_3.0_1730259343634.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_q4_0_4_4_en_5.5.1_3.0_1730259343634.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_q4_0_4_4","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_q4_0_4_4", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yi_coder_1.5b_q4_0_4_4| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|824.5 MB| + +## References + +https://huggingface.co/lmstudio-community/Yi-Coder-1.5B-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-yi_coder_1.5b_q4_k_m_en.md b/docs/_posts/ahmedlone127/2024-10-30-yi_coder_1.5b_q4_k_m_en.md new file mode 100644 index 00000000000000..e509984dd38a00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-yi_coder_1.5b_q4_k_m_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English yi_coder_1.5b_q4_k_m AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: yi_coder_1.5b_q4_k_m +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yi_coder_1.5b_q4_k_m` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_q4_k_m_en_5.5.1_3.0_1730259398913.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_q4_k_m_en_5.5.1_3.0_1730259398913.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_q4_k_m","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_q4_k_m", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yi_coder_1.5b_q4_k_m| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|944.8 MB| + +## References + +https://huggingface.co/lmstudio-community/Yi-Coder-1.5B-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-yi_coder_1.5b_q6_k_en.md b/docs/_posts/ahmedlone127/2024-10-30-yi_coder_1.5b_q6_k_en.md new file mode 100644 index 00000000000000..76a67c96cfc4c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-yi_coder_1.5b_q6_k_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English yi_coder_1.5b_q6_k AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: yi_coder_1.5b_q6_k +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yi_coder_1.5b_q6_k` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_q6_k_en_5.5.1_3.0_1730259462613.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_q6_k_en_5.5.1_3.0_1730259462613.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_q6_k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_q6_k", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yi_coder_1.5b_q6_k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/lmstudio-community/Yi-Coder-1.5B-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-10-30-yi_coder_1.5b_q8_0_en.md b/docs/_posts/ahmedlone127/2024-10-30-yi_coder_1.5b_q8_0_en.md new file mode 100644 index 00000000000000..9d9cdc82f447fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-10-30-yi_coder_1.5b_q8_0_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English yi_coder_1.5b_q8_0 AutoGGUFModel from lmstudio-community +author: John Snow Labs +name: yi_coder_1.5b_q8_0 +date: 2024-10-30 +tags: [en, open_source, onnx, conversational, text_generation, text_to_text, llamacpp] +task: Text Generation +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: llamacpp +annotator: AutoGGUFModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AutoGGUFModel model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yi_coder_1.5b_q8_0` is a English model prepared by lmstudio-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_q8_0_en_5.5.1_3.0_1730259543780.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yi_coder_1.5b_q8_0_en_5.5.1_3.0_1730259543780.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_q8_0","en") \ + .setInputCols(["document"]) \ + .setOutputCol("completions") \ + .setBatchSize(4) \ + .setNPredict(20) \ + .setNGpuLayers(99) \ + .setTemperature(0.4) \ + .setTopK(40) \ + .setTopP(0.9) \ + .setPenalizeNl(True) + +pipeline = Pipeline().setStages([document, autoGGUFModel]) +data = spark.createDataFrame([["Hello, I am a"]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = False) + +``` +```scala + +val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val autoGGUFModel = AutoGGUFModel.pretrained("yi_coder_1.5b_q8_0", "en") + .setInputCols("document") + .setOutputCol("completions") + .setBatchSize(4) + .setNPredict(20) + .setNGpuLayers(99) + .setTemperature(0.4f) + .setTopK(40) + .setTopP(0.9f) + .setPenalizeNl(true) + +val pipeline = new Pipeline().setStages(Array(document, autoGGUFModel)) + +val data = Seq("Hello, I am a").toDF("text") +val result = pipeline.fit(data).transform(data) +result.select("completions").show(truncate = false) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yi_coder_1.5b_q8_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[completions]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/lmstudio-community/Yi-Coder-1.5B-GGUF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-01-distilbart_xsum_12_6_en.md b/docs/_posts/ahmedlone127/2024-11-01-distilbart_xsum_12_6_en.md new file mode 100644 index 00000000000000..350a0b0646f51e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-01-distilbart_xsum_12_6_en.md @@ -0,0 +1,74 @@ +--- +layout: model +title: Abstractive Summarization by BART - DistilBART XSUM +author: John Snow Labs +name: distilbart_xsum_12_6 +date: 2024-11-01 +tags: [en, summarization, text_to_text, distil, open_source, openvino] +task: Summarization +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: openvino +annotator: BartTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +“BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension Transformer” The Facebook BART (Bidirectional and Auto-Regressive Transformer) model is a state-of-the-art language generation model that was introduced by Facebook AI in 2019. It is based on the transformer architecture and is designed to handle a wide range of natural language processing tasks such as text generation, summarization, and machine translation. + +This pre-trained model is DistilBART fine-tuned on the Extreme Summarization (XSum) Dataset. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbart_xsum_12_6_en_5.5.0_3.0_1730492024334.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbart_xsum_12_6_en_5.5.0_3.0_1730492024334.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +bart = BartTransformer.pretrained("distilbart_xsum_12_6") \ + .setTask("summarize:") \ + .setMaxOutputLength(200) \ + .setInputCols(["documents"]) \ + .setOutputCol("summaries") +``` +```scala +val bart = BartTransformer.pretrained("distilbart_xsum_12_6") + .setTask("summarize:") + .setMaxOutputLength(200) + .setInputCols("documents") + .setOutputCol("summaries") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbart_xsum_12_6| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents]| +|Output Labels:|[generation]| +|Language:|en| +|Size:|853.7 MB| + +## References + +https://huggingface.co/sshleifer/distilbart-xsum-12-6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-03-gpt2_en.md b/docs/_posts/ahmedlone127/2024-11-03-gpt2_en.md new file mode 100644 index 00000000000000..bd930b0c8484d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-03-gpt2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: GPT2 text-to-text model (Base) +author: John Snow Labs +name: gpt2 +date: 2024-11-03 +tags: [gpt2, en, open_source, onnx, openvino] +task: Text Generation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: openvino +annotator: GPT2Transformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +“GPT-2 displays a broad set of capabilities, including the ability to generate conditional synthetic text samples of unprecedented quality, where the model is primed with an input and it generates a lengthy continuation. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gpt2_en_5.5.0_3.0_1730653115205.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gpt2_en_5.5.0_3.0_1730653115205.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("documents") + +gpt2 = GPT2Transformer.pretrained("gpt2") \ +.setInputCols(["documents"]) \ +.setMaxOutputLength(50) \ +.setOutputCol("generation") + +pipeline = Pipeline().setStages([documentAssembler, gpt2]) +data = spark.createDataFrame([["My name is Leonardo."]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.select("summaries.generation").show(truncate=False) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("documents") + +val gpt2 = GPT2Transformer.pretrained("gpt2") +.setInputCols(Array("documents")) +.setMinOutputLength(10) +.setMaxOutputLength(50) +.setDoSample(false) +.setTopK(50) +.setNoRepeatNgramSize(3) +.setOutputCol("generation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, gpt2)) + +val data = Seq("My name is Leonardo.").toDF("text") +val result = pipeline.fit(data).transform(data) +results.select("generation.result").show(truncate = false) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gpt2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents]| +|Output Labels:|[generation]| +|Language:|en| +|Size:|467.4 MB| + +## References + +https://huggingface.co/openai-community/gpt2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-08-hubert_large_japanese_asr_ja.md b/docs/_posts/ahmedlone127/2024-11-08-hubert_large_japanese_asr_ja.md new file mode 100644 index 00000000000000..179cb685bb018e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-08-hubert_large_japanese_asr_ja.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Japanese hubert_large_japanese_asr HubertForCTC from TKU410410103 +author: John Snow Labs +name: hubert_large_japanese_asr +date: 2024-11-08 +tags: [ja, open_source, onnx, asr, hubert] +task: Automatic Speech Recognition +language: ja +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: HubertForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubert_large_japanese_asr` is a Japanese model originally trained by TKU410410103. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubert_large_japanese_asr_ja_5.5.1_3.0_1731106819898.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubert_large_japanese_asr_ja_5.5.1_3.0_1731106819898.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = HubertForCTC.pretrained("hubert_large_japanese_asr","ja") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = HubertForCTC.pretrained("hubert_large_japanese_asr", "ja") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubert_large_japanese_asr| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ja| +|Size:|2.4 GB| + +## References + +https://huggingface.co/TKU410410103/hubert-large-japanese-asr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-08-hubert_large_japanese_asr_pipeline_ja.md b/docs/_posts/ahmedlone127/2024-11-08-hubert_large_japanese_asr_pipeline_ja.md new file mode 100644 index 00000000000000..bbaf2cb1e9e949 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-08-hubert_large_japanese_asr_pipeline_ja.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Japanese hubert_large_japanese_asr_pipeline pipeline HubertForCTC from TKU410410103 +author: John Snow Labs +name: hubert_large_japanese_asr_pipeline +date: 2024-11-08 +tags: [ja, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ja +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubert_large_japanese_asr_pipeline` is a Japanese model originally trained by TKU410410103. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubert_large_japanese_asr_pipeline_ja_5.5.1_3.0_1731106937966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubert_large_japanese_asr_pipeline_ja_5.5.1_3.0_1731106937966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hubert_large_japanese_asr_pipeline", lang = "ja") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hubert_large_japanese_asr_pipeline", lang = "ja") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubert_large_japanese_asr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ja| +|Size:|2.4 GB| + +## References + +https://huggingface.co/TKU410410103/hubert-large-japanese-asr + +## Included Models + +- AudioAssembler +- HubertForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-08-hubert_ukrainian_pipeline_uk.md b/docs/_posts/ahmedlone127/2024-11-08-hubert_ukrainian_pipeline_uk.md new file mode 100644 index 00000000000000..a7a707e6f4fca7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-08-hubert_ukrainian_pipeline_uk.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Ukrainian hubert_ukrainian_pipeline pipeline HubertForCTC from Yehor +author: John Snow Labs +name: hubert_ukrainian_pipeline +date: 2024-11-08 +tags: [uk, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: uk +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubert_ukrainian_pipeline` is a Ukrainian model originally trained by Yehor. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubert_ukrainian_pipeline_uk_5.5.1_3.0_1731106461400.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubert_ukrainian_pipeline_uk_5.5.1_3.0_1731106461400.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hubert_ukrainian_pipeline", lang = "uk") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hubert_ukrainian_pipeline", lang = "uk") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubert_ukrainian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|uk| +|Size:|708.6 MB| + +## References + +https://huggingface.co/Yehor/hubert-uk + +## Included Models + +- AudioAssembler +- HubertForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-08-hubert_ukrainian_uk.md b/docs/_posts/ahmedlone127/2024-11-08-hubert_ukrainian_uk.md new file mode 100644 index 00000000000000..731f17518453aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-08-hubert_ukrainian_uk.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Ukrainian hubert_ukrainian HubertForCTC from Yehor +author: John Snow Labs +name: hubert_ukrainian +date: 2024-11-08 +tags: [uk, open_source, onnx, asr, hubert] +task: Automatic Speech Recognition +language: uk +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: HubertForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubert_ukrainian` is a Ukrainian model originally trained by Yehor. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubert_ukrainian_uk_5.5.1_3.0_1731106423734.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubert_ukrainian_uk_5.5.1_3.0_1731106423734.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = HubertForCTC.pretrained("hubert_ukrainian","uk") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = HubertForCTC.pretrained("hubert_ukrainian", "uk") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubert_ukrainian| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|uk| +|Size:|708.6 MB| + +## References + +https://huggingface.co/Yehor/hubert-uk \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-08-unitku_hubert_japanese_asr_ja.md b/docs/_posts/ahmedlone127/2024-11-08-unitku_hubert_japanese_asr_ja.md new file mode 100644 index 00000000000000..2e5756e83f9213 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-08-unitku_hubert_japanese_asr_ja.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Japanese unitku_hubert_japanese_asr HubertForCTC from TKU410410103 +author: John Snow Labs +name: unitku_hubert_japanese_asr +date: 2024-11-08 +tags: [ja, open_source, onnx, asr, hubert] +task: Automatic Speech Recognition +language: ja +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: HubertForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`unitku_hubert_japanese_asr` is a Japanese model originally trained by TKU410410103. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/unitku_hubert_japanese_asr_ja_5.5.1_3.0_1731106577460.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/unitku_hubert_japanese_asr_ja_5.5.1_3.0_1731106577460.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = HubertForCTC.pretrained("unitku_hubert_japanese_asr","ja") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = HubertForCTC.pretrained("unitku_hubert_japanese_asr", "ja") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|unitku_hubert_japanese_asr| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ja| +|Size:|708.5 MB| + +## References + +https://huggingface.co/TKU410410103/uniTKU-hubert-japanese-asr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-08-unitku_hubert_japanese_asr_pipeline_ja.md b/docs/_posts/ahmedlone127/2024-11-08-unitku_hubert_japanese_asr_pipeline_ja.md new file mode 100644 index 00000000000000..04141277632332 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-08-unitku_hubert_japanese_asr_pipeline_ja.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Japanese unitku_hubert_japanese_asr_pipeline pipeline HubertForCTC from TKU410410103 +author: John Snow Labs +name: unitku_hubert_japanese_asr_pipeline +date: 2024-11-08 +tags: [ja, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ja +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`unitku_hubert_japanese_asr_pipeline` is a Japanese model originally trained by TKU410410103. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/unitku_hubert_japanese_asr_pipeline_ja_5.5.1_3.0_1731106615568.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/unitku_hubert_japanese_asr_pipeline_ja_5.5.1_3.0_1731106615568.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("unitku_hubert_japanese_asr_pipeline", lang = "ja") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("unitku_hubert_japanese_asr_pipeline", lang = "ja") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|unitku_hubert_japanese_asr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ja| +|Size:|708.5 MB| + +## References + +https://huggingface.co/TKU410410103/uniTKU-hubert-japanese-asr + +## Included Models + +- AudioAssembler +- HubertForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-afriberta_v2_large_en.md b/docs/_posts/ahmedlone127/2024-11-10-afriberta_v2_large_en.md new file mode 100644 index 00000000000000..c05c382dacc0a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-afriberta_v2_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English afriberta_v2_large XlmRoBertaEmbeddings from castorini +author: John Snow Labs +name: afriberta_v2_large +date: 2024-11-10 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`afriberta_v2_large` is a English model originally trained by castorini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/afriberta_v2_large_en_5.5.1_3.0_1731282953480.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/afriberta_v2_large_en_5.5.1_3.0_1731282953480.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("afriberta_v2_large","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("afriberta_v2_large","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|afriberta_v2_large| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|698.8 MB| + +## References + +https://huggingface.co/castorini/afriberta_v2_large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-afriberta_v2_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-afriberta_v2_large_pipeline_en.md new file mode 100644 index 00000000000000..c706a4fc12a106 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-afriberta_v2_large_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English afriberta_v2_large_pipeline pipeline XlmRoBertaEmbeddings from castorini +author: John Snow Labs +name: afriberta_v2_large_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`afriberta_v2_large_pipeline` is a English model originally trained by castorini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/afriberta_v2_large_pipeline_en_5.5.1_3.0_1731282989499.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/afriberta_v2_large_pipeline_en_5.5.1_3.0_1731282989499.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("afriberta_v2_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("afriberta_v2_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|afriberta_v2_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|698.8 MB| + +## References + +https://huggingface.co/castorini/afriberta_v2_large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_base_chinese_finetuned_food_en.md b/docs/_posts/ahmedlone127/2024-11-10-bert_base_chinese_finetuned_food_en.md new file mode 100644 index 00000000000000..2d8838b6115ee7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_base_chinese_finetuned_food_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_base_chinese_finetuned_food BertForTokenClassification from zhiguoxu +author: John Snow Labs +name: bert_base_chinese_finetuned_food +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_finetuned_food` is a English model originally trained by zhiguoxu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_finetuned_food_en_5.5.1_3.0_1731279799981.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_finetuned_food_en_5.5.1_3.0_1731279799981.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_base_chinese_finetuned_food","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_base_chinese_finetuned_food", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_finetuned_food| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|381.1 MB| + +## References + +https://huggingface.co/zhiguoxu/bert-base-chinese-finetuned-food \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_base_chinese_finetuned_food_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-bert_base_chinese_finetuned_food_pipeline_en.md new file mode 100644 index 00000000000000..be5c05a93a5c0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_base_chinese_finetuned_food_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_chinese_finetuned_food_pipeline pipeline BertForTokenClassification from zhiguoxu +author: John Snow Labs +name: bert_base_chinese_finetuned_food_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_finetuned_food_pipeline` is a English model originally trained by zhiguoxu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_finetuned_food_pipeline_en_5.5.1_3.0_1731279819532.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_finetuned_food_pipeline_en_5.5.1_3.0_1731279819532.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_chinese_finetuned_food_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_chinese_finetuned_food_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_finetuned_food_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|381.2 MB| + +## References + +https://huggingface.co/zhiguoxu/bert-base-chinese-finetuned-food + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_arc_ner_en.md b/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_arc_ner_en.md new file mode 100644 index 00000000000000..afb323f2bf853a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_arc_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_arc_ner BertForTokenClassification from nstrn-mo +author: John Snow Labs +name: bert_finetuned_arc_ner +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_arc_ner` is a English model originally trained by nstrn-mo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_arc_ner_en_5.5.1_3.0_1731279808777.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_arc_ner_en_5.5.1_3.0_1731279808777.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_arc_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_arc_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_arc_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/nstrn-mo/bert-finetuned-arc-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_arc_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_arc_ner_pipeline_en.md new file mode 100644 index 00000000000000..8f413ddae5894e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_arc_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_arc_ner_pipeline pipeline BertForTokenClassification from nstrn-mo +author: John Snow Labs +name: bert_finetuned_arc_ner_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_arc_ner_pipeline` is a English model originally trained by nstrn-mo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_arc_ner_pipeline_en_5.5.1_3.0_1731279829261.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_arc_ner_pipeline_en_5.5.1_3.0_1731279829261.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_arc_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_arc_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_arc_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/nstrn-mo/bert-finetuned-arc-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_ner_1_es.md b/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_ner_1_es.md new file mode 100644 index 00000000000000..0173b5a6811573 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_ner_1_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish bert_finetuned_ner_1 BertForTokenClassification from paulrojasg +author: John Snow Labs +name: bert_finetuned_ner_1 +date: 2024-11-10 +tags: [es, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_1` is a Castilian, Spanish model originally trained by paulrojasg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_1_es_5.5.1_3.0_1731280287792.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_1_es_5.5.1_3.0_1731280287792.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_1","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_1", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_1| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|403.7 MB| + +## References + +https://huggingface.co/paulrojasg/bert-finetuned-ner-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_ner_1_pipeline_es.md b/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_ner_1_pipeline_es.md new file mode 100644 index 00000000000000..e21dfec1b9689c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_ner_1_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish bert_finetuned_ner_1_pipeline pipeline BertForTokenClassification from paulrojasg +author: John Snow Labs +name: bert_finetuned_ner_1_pipeline +date: 2024-11-10 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_1_pipeline` is a Castilian, Spanish model originally trained by paulrojasg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_1_pipeline_es_5.5.1_3.0_1731280308250.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_1_pipeline_es_5.5.1_3.0_1731280308250.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_ner_1_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_ner_1_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|403.7 MB| + +## References + +https://huggingface.co/paulrojasg/bert-finetuned-ner-1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_ner_rob101z_en.md b/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_ner_rob101z_en.md new file mode 100644 index 00000000000000..2c03b7018bfce6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_ner_rob101z_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_ner_rob101z BertForTokenClassification from rob101z +author: John Snow Labs +name: bert_finetuned_ner_rob101z +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_rob101z` is a English model originally trained by rob101z. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_rob101z_en_5.5.1_3.0_1731279543189.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_rob101z_en_5.5.1_3.0_1731279543189.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_rob101z","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_rob101z", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_rob101z| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/rob101z/bert-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_ner_rob101z_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_ner_rob101z_pipeline_en.md new file mode 100644 index 00000000000000..9ec1a09873ac95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_finetuned_ner_rob101z_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_ner_rob101z_pipeline pipeline BertForTokenClassification from rob101z +author: John Snow Labs +name: bert_finetuned_ner_rob101z_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_rob101z_pipeline` is a English model originally trained by rob101z. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_rob101z_pipeline_en_5.5.1_3.0_1731279564182.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_rob101z_pipeline_en_5.5.1_3.0_1731279564182.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_ner_rob101z_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_ner_rob101z_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_rob101z_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/rob101z/bert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_los_muchachos_es.md b/docs/_posts/ahmedlone127/2024-11-10-bert_los_muchachos_es.md new file mode 100644 index 00000000000000..c0d63ee98f089d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_los_muchachos_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish bert_los_muchachos BertForTokenClassification from KPOETA +author: John Snow Labs +name: bert_los_muchachos +date: 2024-11-10 +tags: [es, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_los_muchachos` is a Castilian, Spanish model originally trained by KPOETA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_los_muchachos_es_5.5.1_3.0_1731279598269.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_los_muchachos_es_5.5.1_3.0_1731279598269.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_los_muchachos","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_los_muchachos", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_los_muchachos| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|403.7 MB| + +## References + +https://huggingface.co/KPOETA/Bert-Los-Muchachos \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_los_muchachos_pipeline_es.md b/docs/_posts/ahmedlone127/2024-11-10-bert_los_muchachos_pipeline_es.md new file mode 100644 index 00000000000000..4ac0ca80f26209 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_los_muchachos_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish bert_los_muchachos_pipeline pipeline BertForTokenClassification from KPOETA +author: John Snow Labs +name: bert_los_muchachos_pipeline +date: 2024-11-10 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_los_muchachos_pipeline` is a Castilian, Spanish model originally trained by KPOETA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_los_muchachos_pipeline_es_5.5.1_3.0_1731279619400.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_los_muchachos_pipeline_es_5.5.1_3.0_1731279619400.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_los_muchachos_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_los_muchachos_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_los_muchachos_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|403.7 MB| + +## References + +https://huggingface.co/KPOETA/Bert-Los-Muchachos + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_sliding_window_epoch_6_en.md b/docs/_posts/ahmedlone127/2024-11-10-bert_sliding_window_epoch_6_en.md new file mode 100644 index 00000000000000..b3ca1e54b82196 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_sliding_window_epoch_6_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_sliding_window_epoch_6 DistilBertForQuestionAnswering from Whalejay +author: John Snow Labs +name: bert_sliding_window_epoch_6 +date: 2024-11-10 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_sliding_window_epoch_6` is a English model originally trained by Whalejay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sliding_window_epoch_6_en_5.5.1_3.0_1731281025243.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sliding_window_epoch_6_en_5.5.1_3.0_1731281025243.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("bert_sliding_window_epoch_6","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("bert_sliding_window_epoch_6", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sliding_window_epoch_6| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/Whalejay/bert-sliding-window_epoch_6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_sliding_window_epoch_6_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-bert_sliding_window_epoch_6_pipeline_en.md new file mode 100644 index 00000000000000..16bb03d623dc00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_sliding_window_epoch_6_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_sliding_window_epoch_6_pipeline pipeline DistilBertForQuestionAnswering from Whalejay +author: John Snow Labs +name: bert_sliding_window_epoch_6_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_sliding_window_epoch_6_pipeline` is a English model originally trained by Whalejay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sliding_window_epoch_6_pipeline_en_5.5.1_3.0_1731281037766.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sliding_window_epoch_6_pipeline_en_5.5.1_3.0_1731281037766.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_sliding_window_epoch_6_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_sliding_window_epoch_6_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sliding_window_epoch_6_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/Whalejay/bert-sliding-window_epoch_6 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_swahili_over_en.md b/docs/_posts/ahmedlone127/2024-11-10-bert_swahili_over_en.md new file mode 100644 index 00000000000000..8b6b48c91b4513 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_swahili_over_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_swahili_over DistilBertForQuestionAnswering from Whalejay +author: John Snow Labs +name: bert_swahili_over +date: 2024-11-10 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_swahili_over` is a English model originally trained by Whalejay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_swahili_over_en_5.5.1_3.0_1731280924764.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_swahili_over_en_5.5.1_3.0_1731280924764.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("bert_swahili_over","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("bert_swahili_over", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_swahili_over| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/Whalejay/bert-sw_over \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_swahili_over_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-bert_swahili_over_pipeline_en.md new file mode 100644 index 00000000000000..ac0432cccb0cc5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_swahili_over_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_swahili_over_pipeline pipeline DistilBertForQuestionAnswering from Whalejay +author: John Snow Labs +name: bert_swahili_over_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_swahili_over_pipeline` is a English model originally trained by Whalejay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_swahili_over_pipeline_en_5.5.1_3.0_1731280937596.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_swahili_over_pipeline_en_5.5.1_3.0_1731280937596.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_swahili_over_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_swahili_over_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_swahili_over_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/Whalejay/bert-sw_over + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_swz_en.md b/docs/_posts/ahmedlone127/2024-11-10-bert_swz_en.md new file mode 100644 index 00000000000000..2b019e601c13a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_swz_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_swz DistilBertForQuestionAnswering from Whalejay +author: John Snow Labs +name: bert_swz +date: 2024-11-10 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_swz` is a English model originally trained by Whalejay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_swz_en_5.5.1_3.0_1731281145658.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_swz_en_5.5.1_3.0_1731281145658.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("bert_swz","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("bert_swz", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_swz| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|243.4 MB| + +## References + +https://huggingface.co/Whalejay/bert-swz \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-bert_swz_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-bert_swz_pipeline_en.md new file mode 100644 index 00000000000000..7595fa99c7795a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-bert_swz_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_swz_pipeline pipeline DistilBertForQuestionAnswering from Whalejay +author: John Snow Labs +name: bert_swz_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_swz_pipeline` is a English model originally trained by Whalejay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_swz_pipeline_en_5.5.1_3.0_1731281158228.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_swz_pipeline_en_5.5.1_3.0_1731281158228.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_swz_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_swz_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_swz_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.4 MB| + +## References + +https://huggingface.co/Whalejay/bert-swz + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-burmese_awesome_qa_model_beetroot16_en.md b/docs/_posts/ahmedlone127/2024-11-10-burmese_awesome_qa_model_beetroot16_en.md new file mode 100644 index 00000000000000..29eac96def0b44 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-burmese_awesome_qa_model_beetroot16_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_beetroot16 DistilBertForQuestionAnswering from Beetroot16 +author: John Snow Labs +name: burmese_awesome_qa_model_beetroot16 +date: 2024-11-10 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_beetroot16` is a English model originally trained by Beetroot16. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_beetroot16_en_5.5.1_3.0_1731281058274.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_beetroot16_en_5.5.1_3.0_1731281058274.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_beetroot16","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_beetroot16", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_beetroot16| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Beetroot16/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-burmese_awesome_qa_model_beetroot16_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-burmese_awesome_qa_model_beetroot16_pipeline_en.md new file mode 100644 index 00000000000000..1cf48b643e61d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-burmese_awesome_qa_model_beetroot16_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_beetroot16_pipeline pipeline DistilBertForQuestionAnswering from Beetroot16 +author: John Snow Labs +name: burmese_awesome_qa_model_beetroot16_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_beetroot16_pipeline` is a English model originally trained by Beetroot16. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_beetroot16_pipeline_en_5.5.1_3.0_1731281071491.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_beetroot16_pipeline_en_5.5.1_3.0_1731281071491.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_beetroot16_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_beetroot16_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_beetroot16_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Beetroot16/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-burmese_awesome_qa_model_real_jiakai_en.md b/docs/_posts/ahmedlone127/2024-11-10-burmese_awesome_qa_model_real_jiakai_en.md new file mode 100644 index 00000000000000..e1a9afc8a81e7c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-burmese_awesome_qa_model_real_jiakai_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_real_jiakai DistilBertForQuestionAnswering from real-jiakai +author: John Snow Labs +name: burmese_awesome_qa_model_real_jiakai +date: 2024-11-10 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_real_jiakai` is a English model originally trained by real-jiakai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_real_jiakai_en_5.5.1_3.0_1731281015961.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_real_jiakai_en_5.5.1_3.0_1731281015961.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_real_jiakai","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_real_jiakai", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_real_jiakai| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/real-jiakai/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-burmese_awesome_qa_model_real_jiakai_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-burmese_awesome_qa_model_real_jiakai_pipeline_en.md new file mode 100644 index 00000000000000..18459187f13add --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-burmese_awesome_qa_model_real_jiakai_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_real_jiakai_pipeline pipeline DistilBertForQuestionAnswering from real-jiakai +author: John Snow Labs +name: burmese_awesome_qa_model_real_jiakai_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_real_jiakai_pipeline` is a English model originally trained by real-jiakai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_real_jiakai_pipeline_en_5.5.1_3.0_1731281028771.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_real_jiakai_pipeline_en_5.5.1_3.0_1731281028771.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_real_jiakai_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_real_jiakai_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_real_jiakai_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/real-jiakai/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-burmese_first_model_en.md b/docs/_posts/ahmedlone127/2024-11-10-burmese_first_model_en.md new file mode 100644 index 00000000000000..b5a6842b2f4f85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-burmese_first_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_first_model DistilBertForQuestionAnswering from DarrenLo +author: John Snow Labs +name: burmese_first_model +date: 2024-11-10 +tags: [distilbert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_first_model` is a English model originally trained by DarrenLo. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_first_model_en_5.5.1_3.0_1731280892050.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_first_model_en_5.5.1_3.0_1731280892050.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_first_model","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering + .pretrained("burmese_first_model", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_first_model| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +References + +https://huggingface.co/DarrenLo/my_first_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-burmese_first_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-burmese_first_model_pipeline_en.md new file mode 100644 index 00000000000000..9a6fdad032b978 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-burmese_first_model_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English burmese_first_model_pipeline pipeline CamemBertEmbeddings from hippoleveque +author: John Snow Labs +name: burmese_first_model_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_first_model_pipeline` is a English model originally trained by hippoleveque. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_first_model_pipeline_en_5.5.1_3.0_1731280912722.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_first_model_pipeline_en_5.5.1_3.0_1731280912722.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +pipeline = PretrainedPipeline("burmese_first_model_pipeline", lang = "en") +annotations = pipeline.transform(df) +``` +```scala +val pipeline = new PretrainedPipeline("burmese_first_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_first_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/hippoleveque/my-first-model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-camelbert_msa_qalb15_ged_13_ar.md b/docs/_posts/ahmedlone127/2024-11-10-camelbert_msa_qalb15_ged_13_ar.md new file mode 100644 index 00000000000000..d351ec03a5cc3f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-camelbert_msa_qalb15_ged_13_ar.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Arabic camelbert_msa_qalb15_ged_13 BertForTokenClassification from CAMeL-Lab +author: John Snow Labs +name: camelbert_msa_qalb15_ged_13 +date: 2024-11-10 +tags: [ar, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: ar +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`camelbert_msa_qalb15_ged_13` is a Arabic model originally trained by CAMeL-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camelbert_msa_qalb15_ged_13_ar_5.5.1_3.0_1731280259651.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camelbert_msa_qalb15_ged_13_ar_5.5.1_3.0_1731280259651.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("camelbert_msa_qalb15_ged_13","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("camelbert_msa_qalb15_ged_13", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camelbert_msa_qalb15_ged_13| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|ar| +|Size:|406.4 MB| + +## References + +https://huggingface.co/CAMeL-Lab/camelbert-msa-qalb15-ged-13 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-camelbert_msa_qalb15_ged_13_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-11-10-camelbert_msa_qalb15_ged_13_pipeline_ar.md new file mode 100644 index 00000000000000..446faf154ccd98 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-camelbert_msa_qalb15_ged_13_pipeline_ar.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Arabic camelbert_msa_qalb15_ged_13_pipeline pipeline BertForTokenClassification from CAMeL-Lab +author: John Snow Labs +name: camelbert_msa_qalb15_ged_13_pipeline +date: 2024-11-10 +tags: [ar, open_source, pipeline, onnx] +task: Named Entity Recognition +language: ar +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`camelbert_msa_qalb15_ged_13_pipeline` is a Arabic model originally trained by CAMeL-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camelbert_msa_qalb15_ged_13_pipeline_ar_5.5.1_3.0_1731280281051.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camelbert_msa_qalb15_ged_13_pipeline_ar_5.5.1_3.0_1731280281051.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("camelbert_msa_qalb15_ged_13_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("camelbert_msa_qalb15_ged_13_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camelbert_msa_qalb15_ged_13_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|406.4 MB| + +## References + +https://huggingface.co/CAMeL-Lab/camelbert-msa-qalb15-ged-13 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-camembert_base_fr.md b/docs/_posts/ahmedlone127/2024-11-10-camembert_base_fr.md new file mode 100644 index 00000000000000..c0161a8bb4ae32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-camembert_base_fr.md @@ -0,0 +1,87 @@ +--- +layout: model +title: CamemBERT Base Model +author: John Snow Labs +name: camembert_base +date: 2024-11-10 +tags: [fr, french, embeddings, camembert, base, open_source, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +[CamemBERT](https://arxiv.org/abs/1911.03894) is a state-of-the-art language model for French based on the RoBERTa model. +For further information or requests, please go to [Camembert Website](https://camembert-model.fr/) + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_fr_5.5.1_3.0_1731281647430.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_fr_5.5.1_3.0_1731281647430.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = CamemBertEmbeddings.pretrained("camembert_base", "fr") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = CamemBertEmbeddings.pretrained("camembert_base", "fr") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert_base").predict("""Put your text here.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_base| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|fr| +|Size:|264.0 MB| + +## Benchmarking + +```bash + + + +| Model | #params | Arch. | Training data | +|--------------------------------|--------------------------------|-------|-----------------------------------| +| `camembert-base` | 110M | Base | OSCAR (138 GB of text) | +| `camembert/camembert-large` | 335M | Large | CCNet (135 GB of text) | +| `camembert/camembert-base-ccnet` | 110M | Base | CCNet (135 GB of text) | +| `camembert/camembert-base-wikipedia-4gb` | 110M | Base | Wikipedia (4 GB of text) | +| `camembert/camembert-base-oscar-4gb` | 110M | Base | Subsample of OSCAR (4 GB of text) | +| `camembert/camembert-base-ccnet-4gb` | 110M | Base | Subsample of CCNet (4 GB of text) | +``` \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-camembert_base_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-11-10-camembert_base_pipeline_fr.md new file mode 100644 index 00000000000000..8cf3d53c32571e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-camembert_base_pipeline_fr.md @@ -0,0 +1,72 @@ +--- +layout: model +title: French camembert_base_pipeline pipeline CamemBertEmbeddings from almanach +author: John Snow Labs +name: camembert_base_pipeline +date: 2024-11-10 +tags: [fr, open_source, pipeline, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`camembert_base_pipeline` is a French model originally trained by almanach. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_pipeline_fr_5.5.1_3.0_1731281725493.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_pipeline_fr_5.5.1_3.0_1731281725493.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +pipeline = PretrainedPipeline("camembert_base_pipeline", lang = "fr") +annotations = pipeline.transform(df) +``` +```scala +val pipeline = new PretrainedPipeline("camembert_base_pipeline", lang = "fr") +val annotations = pipeline.transform(df) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|264.0 MB| + +## References + +References + +https://huggingface.co/almanach/camembert-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-damage_trigger_effect_2024_11_06_13_00_en.md b/docs/_posts/ahmedlone127/2024-11-10-damage_trigger_effect_2024_11_06_13_00_en.md new file mode 100644 index 00000000000000..ae337e93852488 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-damage_trigger_effect_2024_11_06_13_00_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English damage_trigger_effect_2024_11_06_13_00 BertForTokenClassification from Lolimorimorf +author: John Snow Labs +name: damage_trigger_effect_2024_11_06_13_00 +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`damage_trigger_effect_2024_11_06_13_00` is a English model originally trained by Lolimorimorf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/damage_trigger_effect_2024_11_06_13_00_en_5.5.1_3.0_1731279661470.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/damage_trigger_effect_2024_11_06_13_00_en_5.5.1_3.0_1731279661470.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("damage_trigger_effect_2024_11_06_13_00","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("damage_trigger_effect_2024_11_06_13_00", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|damage_trigger_effect_2024_11_06_13_00| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/Lolimorimorf/damage_trigger_effect_2024-11-06_13_00 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-damage_trigger_effect_2024_11_06_13_00_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-damage_trigger_effect_2024_11_06_13_00_pipeline_en.md new file mode 100644 index 00000000000000..2e66cbbe5009a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-damage_trigger_effect_2024_11_06_13_00_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English damage_trigger_effect_2024_11_06_13_00_pipeline pipeline BertForTokenClassification from Lolimorimorf +author: John Snow Labs +name: damage_trigger_effect_2024_11_06_13_00_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`damage_trigger_effect_2024_11_06_13_00_pipeline` is a English model originally trained by Lolimorimorf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/damage_trigger_effect_2024_11_06_13_00_pipeline_en_5.5.1_3.0_1731279697514.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/damage_trigger_effect_2024_11_06_13_00_pipeline_en_5.5.1_3.0_1731279697514.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("damage_trigger_effect_2024_11_06_13_00_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("damage_trigger_effect_2024_11_06_13_00_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|damage_trigger_effect_2024_11_06_13_00_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/Lolimorimorf/damage_trigger_effect_2024-11-06_13_00 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-devicebert_base_cased_v1_0_en.md b/docs/_posts/ahmedlone127/2024-11-10-devicebert_base_cased_v1_0_en.md new file mode 100644 index 00000000000000..0eece87f726985 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-devicebert_base_cased_v1_0_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English devicebert_base_cased_v1_0 BertForTokenClassification from mfarrington +author: John Snow Labs +name: devicebert_base_cased_v1_0 +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`devicebert_base_cased_v1_0` is a English model originally trained by mfarrington. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/devicebert_base_cased_v1_0_en_5.5.1_3.0_1731280029932.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/devicebert_base_cased_v1_0_en_5.5.1_3.0_1731280029932.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("devicebert_base_cased_v1_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("devicebert_base_cased_v1_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|devicebert_base_cased_v1_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|869.3 MB| + +## References + +https://huggingface.co/mfarrington/devicebert-base-cased-v1.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-devicebert_base_cased_v1_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-devicebert_base_cased_v1_0_pipeline_en.md new file mode 100644 index 00000000000000..93cd6048269de6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-devicebert_base_cased_v1_0_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English devicebert_base_cased_v1_0_pipeline pipeline BertForTokenClassification from mfarrington +author: John Snow Labs +name: devicebert_base_cased_v1_0_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`devicebert_base_cased_v1_0_pipeline` is a English model originally trained by mfarrington. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/devicebert_base_cased_v1_0_pipeline_en_5.5.1_3.0_1731280075380.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/devicebert_base_cased_v1_0_pipeline_en_5.5.1_3.0_1731280075380.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("devicebert_base_cased_v1_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("devicebert_base_cased_v1_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|devicebert_base_cased_v1_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|869.3 MB| + +## References + +https://huggingface.co/mfarrington/devicebert-base-cased-v1.0 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_cotysong113_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_cotysong113_en.md new file mode 100644 index 00000000000000..9c462b20b8cb3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_cotysong113_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_cotysong113 DistilBertEmbeddings from cotysong113 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_cotysong113 +date: 2024-11-10 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_cotysong113` is a English model originally trained by cotysong113. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cotysong113_en_5.5.1_3.0_1731282176150.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cotysong113_en_5.5.1_3.0_1731282176150.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_cotysong113","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_cotysong113","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_cotysong113| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/cotysong113/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_cotysong113_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_cotysong113_pipeline_en.md new file mode 100644 index 00000000000000..8d44d9e2fb7caa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_cotysong113_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_cotysong113_pipeline pipeline DistilBertEmbeddings from cotysong113 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_cotysong113_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_cotysong113_pipeline` is a English model originally trained by cotysong113. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cotysong113_pipeline_en_5.5.1_3.0_1731282189190.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cotysong113_pipeline_en_5.5.1_3.0_1731282189190.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_cotysong113_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_cotysong113_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_cotysong113_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/cotysong113/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_ehottl_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_ehottl_en.md new file mode 100644 index 00000000000000..dbacb232bdbf60 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_ehottl_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_ehottl DistilBertEmbeddings from ehottl +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_ehottl +date: 2024-11-10 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_ehottl` is a English model originally trained by ehottl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ehottl_en_5.5.1_3.0_1731282139864.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ehottl_en_5.5.1_3.0_1731282139864.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_ehottl","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_ehottl","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_ehottl| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ehottl/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_ehottl_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_ehottl_pipeline_en.md new file mode 100644 index 00000000000000..d39253c5f1987c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_ehottl_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_ehottl_pipeline pipeline DistilBertEmbeddings from ehottl +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_ehottl_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_ehottl_pipeline` is a English model originally trained by ehottl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ehottl_pipeline_en_5.5.1_3.0_1731282152676.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ehottl_pipeline_en_5.5.1_3.0_1731282152676.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_ehottl_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_ehottl_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_ehottl_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ehottl/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_gpragada_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_gpragada_en.md new file mode 100644 index 00000000000000..1b6ad6af277429 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_gpragada_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_gpragada DistilBertEmbeddings from Gpragada +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_gpragada +date: 2024-11-10 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_gpragada` is a English model originally trained by Gpragada. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_gpragada_en_5.5.1_3.0_1731282229661.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_gpragada_en_5.5.1_3.0_1731282229661.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_gpragada","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_gpragada","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_gpragada| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Gpragada/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_gpragada_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_gpragada_pipeline_en.md new file mode 100644 index 00000000000000..5d6fec4b18237f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_gpragada_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_gpragada_pipeline pipeline DistilBertEmbeddings from Gpragada +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_gpragada_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_gpragada_pipeline` is a English model originally trained by Gpragada. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_gpragada_pipeline_en_5.5.1_3.0_1731282242420.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_gpragada_pipeline_en_5.5.1_3.0_1731282242420.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_gpragada_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_gpragada_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_gpragada_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Gpragada/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_ryosuke11_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_ryosuke11_en.md new file mode 100644 index 00000000000000..6b8b9b137a7027 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_ryosuke11_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_ryosuke11 DistilBertEmbeddings from Ryosuke11 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_ryosuke11 +date: 2024-11-10 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_ryosuke11` is a English model originally trained by Ryosuke11. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ryosuke11_en_5.5.1_3.0_1731282148349.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ryosuke11_en_5.5.1_3.0_1731282148349.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_ryosuke11","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_ryosuke11","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_ryosuke11| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Ryosuke11/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_ryosuke11_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_ryosuke11_pipeline_en.md new file mode 100644 index 00000000000000..05bf063d4cfec5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_imdb_ryosuke11_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_ryosuke11_pipeline pipeline DistilBertEmbeddings from Ryosuke11 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_ryosuke11_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_ryosuke11_pipeline` is a English model originally trained by Ryosuke11. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ryosuke11_pipeline_en_5.5.1_3.0_1731282161867.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ryosuke11_pipeline_en_5.5.1_3.0_1731282161867.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_ryosuke11_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_ryosuke11_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_ryosuke11_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Ryosuke11/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_arthur2025_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_arthur2025_en.md new file mode 100644 index 00000000000000..f552a129fb20cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_arthur2025_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_arthur2025 DistilBertForQuestionAnswering from Arthur2025 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_arthur2025 +date: 2024-11-10 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_arthur2025` is a English model originally trained by Arthur2025. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_arthur2025_en_5.5.1_3.0_1731281054898.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_arthur2025_en_5.5.1_3.0_1731281054898.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_arthur2025","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_arthur2025", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_arthur2025| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Arthur2025/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_arthur2025_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_arthur2025_pipeline_en.md new file mode 100644 index 00000000000000..86b84e69ed083f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_arthur2025_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_arthur2025_pipeline pipeline DistilBertForQuestionAnswering from Arthur2025 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_arthur2025_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_arthur2025_pipeline` is a English model originally trained by Arthur2025. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_arthur2025_pipeline_en_5.5.1_3.0_1731281068086.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_arthur2025_pipeline_en_5.5.1_3.0_1731281068086.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_arthur2025_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_arthur2025_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_arthur2025_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Arthur2025/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_baranll0_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_baranll0_en.md new file mode 100644 index 00000000000000..67208a48302ea0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_baranll0_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_baranll0 DistilBertForQuestionAnswering from Baranll0 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_baranll0 +date: 2024-11-10 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_baranll0` is a English model originally trained by Baranll0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_baranll0_en_5.5.1_3.0_1731280892679.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_baranll0_en_5.5.1_3.0_1731280892679.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_baranll0","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_baranll0", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_baranll0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Baranll0/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_baranll0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_baranll0_pipeline_en.md new file mode 100644 index 00000000000000..242222b3754520 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_baranll0_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_baranll0_pipeline pipeline DistilBertForQuestionAnswering from Baranll0 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_baranll0_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_baranll0_pipeline` is a English model originally trained by Baranll0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_baranll0_pipeline_en_5.5.1_3.0_1731280912515.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_baranll0_pipeline_en_5.5.1_3.0_1731280912515.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_baranll0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_baranll0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_baranll0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Baranll0/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_sprenkamp_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_sprenkamp_en.md new file mode 100644 index 00000000000000..3b12e654fdbf0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_sprenkamp_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_sprenkamp DistilBertForQuestionAnswering from sprenkamp +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_sprenkamp +date: 2024-11-10 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_sprenkamp` is a English model originally trained by sprenkamp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_sprenkamp_en_5.5.1_3.0_1731281165915.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_sprenkamp_en_5.5.1_3.0_1731281165915.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_sprenkamp","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_sprenkamp", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_sprenkamp| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sprenkamp/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_sprenkamp_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_sprenkamp_pipeline_en.md new file mode 100644 index 00000000000000..76a4f7aeab72af --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_squad_sprenkamp_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_sprenkamp_pipeline pipeline DistilBertForQuestionAnswering from sprenkamp +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_sprenkamp_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_sprenkamp_pipeline` is a English model originally trained by sprenkamp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_sprenkamp_pipeline_en_5.5.1_3.0_1731281178653.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_sprenkamp_pipeline_en_5.5.1_3.0_1731281178653.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_sprenkamp_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_sprenkamp_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_sprenkamp_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/sprenkamp/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_toxicchat_accelerate_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_toxicchat_accelerate_en.md new file mode 100644 index 00000000000000..88a4771699ffe9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_toxicchat_accelerate_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_toxicchat_accelerate DistilBertEmbeddings from imcord +author: John Snow Labs +name: distilbert_base_uncased_finetuned_toxicchat_accelerate +date: 2024-11-10 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_toxicchat_accelerate` is a English model originally trained by imcord. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_toxicchat_accelerate_en_5.5.1_3.0_1731282141997.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_toxicchat_accelerate_en_5.5.1_3.0_1731282141997.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_toxicchat_accelerate","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_toxicchat_accelerate","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_toxicchat_accelerate| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/imcord/distilbert-base-uncased-finetuned-toxicchat-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_toxicchat_accelerate_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_toxicchat_accelerate_pipeline_en.md new file mode 100644 index 00000000000000..0ad94025c00a43 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-distilbert_base_uncased_finetuned_toxicchat_accelerate_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_toxicchat_accelerate_pipeline pipeline DistilBertEmbeddings from imcord +author: John Snow Labs +name: distilbert_base_uncased_finetuned_toxicchat_accelerate_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_toxicchat_accelerate_pipeline` is a English model originally trained by imcord. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_toxicchat_accelerate_pipeline_en_5.5.1_3.0_1731282155311.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_toxicchat_accelerate_pipeline_en_5.5.1_3.0_1731282155311.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_toxicchat_accelerate_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_toxicchat_accelerate_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_toxicchat_accelerate_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/imcord/distilbert-base-uncased-finetuned-toxicchat-accelerate + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-dummy_model_minsik_oh_en.md b/docs/_posts/ahmedlone127/2024-11-10-dummy_model_minsik_oh_en.md new file mode 100644 index 00000000000000..0d860b9fd3fc62 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-dummy_model_minsik_oh_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_minsik_oh CamemBertEmbeddings from minsik-oh +author: John Snow Labs +name: dummy_model_minsik_oh +date: 2024-11-10 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_minsik_oh` is a English model originally trained by minsik-oh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_minsik_oh_en_5.5.1_3.0_1731281650536.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_minsik_oh_en_5.5.1_3.0_1731281650536.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_minsik_oh","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_minsik_oh","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_minsik_oh| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/minsik-oh/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-dummy_model_minsik_oh_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-dummy_model_minsik_oh_pipeline_en.md new file mode 100644 index 00000000000000..782b385cdd5450 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-dummy_model_minsik_oh_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_minsik_oh_pipeline pipeline CamemBertEmbeddings from minsik-oh +author: John Snow Labs +name: dummy_model_minsik_oh_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_minsik_oh_pipeline` is a English model originally trained by minsik-oh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_minsik_oh_pipeline_en_5.5.1_3.0_1731281727740.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_minsik_oh_pipeline_en_5.5.1_3.0_1731281727740.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_minsik_oh_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_minsik_oh_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_minsik_oh_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/minsik-oh/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-dummy_model_osanseviero_en.md b/docs/_posts/ahmedlone127/2024-11-10-dummy_model_osanseviero_en.md new file mode 100644 index 00000000000000..c400ea64cd7572 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-dummy_model_osanseviero_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_osanseviero CamemBertEmbeddings from osanseviero +author: John Snow Labs +name: dummy_model_osanseviero +date: 2024-11-10 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_osanseviero` is a English model originally trained by osanseviero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_osanseviero_en_5.5.1_3.0_1731281750854.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_osanseviero_en_5.5.1_3.0_1731281750854.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_osanseviero","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_osanseviero","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_osanseviero| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/osanseviero/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-dummy_model_osanseviero_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-dummy_model_osanseviero_pipeline_en.md new file mode 100644 index 00000000000000..3bc3689c116572 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-dummy_model_osanseviero_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_osanseviero_pipeline pipeline CamemBertEmbeddings from osanseviero +author: John Snow Labs +name: dummy_model_osanseviero_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_osanseviero_pipeline` is a English model originally trained by osanseviero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_osanseviero_pipeline_en_5.5.1_3.0_1731281825605.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_osanseviero_pipeline_en_5.5.1_3.0_1731281825605.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_osanseviero_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_osanseviero_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_osanseviero_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/osanseviero/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-duplicate1_en.md b/docs/_posts/ahmedlone127/2024-11-10-duplicate1_en.md new file mode 100644 index 00000000000000..1c57689a92c63c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-duplicate1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English duplicate1 BertForTokenClassification from Somisetty2347 +author: John Snow Labs +name: duplicate1 +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`duplicate1` is a English model originally trained by Somisetty2347. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/duplicate1_en_5.5.1_3.0_1731280110525.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/duplicate1_en_5.5.1_3.0_1731280110525.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("duplicate1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("duplicate1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|duplicate1| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/Somisetty2347/duplicate1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-duplicate1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-duplicate1_pipeline_en.md new file mode 100644 index 00000000000000..256abcf0431f60 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-duplicate1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English duplicate1_pipeline pipeline BertForTokenClassification from Somisetty2347 +author: John Snow Labs +name: duplicate1_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`duplicate1_pipeline` is a English model originally trained by Somisetty2347. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/duplicate1_pipeline_en_5.5.1_3.0_1731280134766.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/duplicate1_pipeline_en_5.5.1_3.0_1731280134766.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("duplicate1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("duplicate1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|duplicate1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/Somisetty2347/duplicate1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-finetuned_bert_chinese_base_en.md b/docs/_posts/ahmedlone127/2024-11-10-finetuned_bert_chinese_base_en.md new file mode 100644 index 00000000000000..b997cad0590ceb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-finetuned_bert_chinese_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuned_bert_chinese_base BertForTokenClassification from r45289 +author: John Snow Labs +name: finetuned_bert_chinese_base +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_bert_chinese_base` is a English model originally trained by r45289. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_bert_chinese_base_en_5.5.1_3.0_1731279959385.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_bert_chinese_base_en_5.5.1_3.0_1731279959385.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("finetuned_bert_chinese_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("finetuned_bert_chinese_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_bert_chinese_base| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|381.1 MB| + +## References + +https://huggingface.co/r45289/finetuned-bert-chinese-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-finetuned_bert_chinese_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-finetuned_bert_chinese_base_pipeline_en.md new file mode 100644 index 00000000000000..364fc96fd407cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-finetuned_bert_chinese_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuned_bert_chinese_base_pipeline pipeline BertForTokenClassification from r45289 +author: John Snow Labs +name: finetuned_bert_chinese_base_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_bert_chinese_base_pipeline` is a English model originally trained by r45289. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_bert_chinese_base_pipeline_en_5.5.1_3.0_1731279980123.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_bert_chinese_base_pipeline_en_5.5.1_3.0_1731279980123.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_bert_chinese_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_bert_chinese_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_bert_chinese_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|381.1 MB| + +## References + +https://huggingface.co/r45289/finetuned-bert-chinese-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-german_medical_ner_de.md b/docs/_posts/ahmedlone127/2024-11-10-german_medical_ner_de.md new file mode 100644 index 00000000000000..aab353afee4e70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-german_medical_ner_de.md @@ -0,0 +1,94 @@ +--- +layout: model +title: German german_medical_ner BertForTokenClassification from HUMADEX +author: John Snow Labs +name: german_medical_ner +date: 2024-11-10 +tags: [de, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`german_medical_ner` is a German model originally trained by HUMADEX. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/german_medical_ner_de_5.5.1_3.0_1731280121721.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/german_medical_ner_de_5.5.1_3.0_1731280121721.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("german_medical_ner","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("german_medical_ner", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|german_medical_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|de| +|Size:|403.7 MB| + +## References + +https://huggingface.co/HUMADEX/german_medical_ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-german_medical_ner_pipeline_de.md b/docs/_posts/ahmedlone127/2024-11-10-german_medical_ner_pipeline_de.md new file mode 100644 index 00000000000000..c0ef2c609f6c85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-german_medical_ner_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German german_medical_ner_pipeline pipeline BertForTokenClassification from HUMADEX +author: John Snow Labs +name: german_medical_ner_pipeline +date: 2024-11-10 +tags: [de, open_source, pipeline, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`german_medical_ner_pipeline` is a German model originally trained by HUMADEX. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/german_medical_ner_pipeline_de_5.5.1_3.0_1731280146620.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/german_medical_ner_pipeline_de_5.5.1_3.0_1731280146620.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("german_medical_ner_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("german_medical_ner_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|german_medical_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|403.7 MB| + +## References + +https://huggingface.co/HUMADEX/german_medical_ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-ijelid_indobertweet_en.md b/docs/_posts/ahmedlone127/2024-11-10-ijelid_indobertweet_en.md new file mode 100644 index 00000000000000..c8e6cb67895d7d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-ijelid_indobertweet_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ijelid_indobertweet BertForTokenClassification from fathan +author: John Snow Labs +name: ijelid_indobertweet +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ijelid_indobertweet` is a English model originally trained by fathan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ijelid_indobertweet_en_5.5.1_3.0_1731279940333.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ijelid_indobertweet_en_5.5.1_3.0_1731279940333.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("ijelid_indobertweet","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("ijelid_indobertweet", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ijelid_indobertweet| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|411.8 MB| + +## References + +https://huggingface.co/fathan/ijelid-indobertweet \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-ijelid_indobertweet_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-ijelid_indobertweet_pipeline_en.md new file mode 100644 index 00000000000000..8f87aadc297849 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-ijelid_indobertweet_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ijelid_indobertweet_pipeline pipeline BertForTokenClassification from fathan +author: John Snow Labs +name: ijelid_indobertweet_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ijelid_indobertweet_pipeline` is a English model originally trained by fathan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ijelid_indobertweet_pipeline_en_5.5.1_3.0_1731279965018.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ijelid_indobertweet_pipeline_en_5.5.1_3.0_1731279965018.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ijelid_indobertweet_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ijelid_indobertweet_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ijelid_indobertweet_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|411.9 MB| + +## References + +https://huggingface.co/fathan/ijelid-indobertweet + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-indobert_large_p2_finetuned_ner_id.md b/docs/_posts/ahmedlone127/2024-11-10-indobert_large_p2_finetuned_ner_id.md new file mode 100644 index 00000000000000..9932e9305b0ce3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-indobert_large_p2_finetuned_ner_id.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Indonesian indobert_large_p2_finetuned_ner BertForTokenClassification from ageng-anugrah +author: John Snow Labs +name: indobert_large_p2_finetuned_ner +date: 2024-11-10 +tags: [id, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: id +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indobert_large_p2_finetuned_ner` is a Indonesian model originally trained by ageng-anugrah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indobert_large_p2_finetuned_ner_id_5.5.1_3.0_1731279404489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indobert_large_p2_finetuned_ner_id_5.5.1_3.0_1731279404489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("indobert_large_p2_finetuned_ner","id") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("indobert_large_p2_finetuned_ner", "id") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indobert_large_p2_finetuned_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|id| +|Size:|1.3 GB| + +## References + +https://huggingface.co/ageng-anugrah/indobert-large-p2-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-indobert_large_p2_finetuned_ner_pipeline_id.md b/docs/_posts/ahmedlone127/2024-11-10-indobert_large_p2_finetuned_ner_pipeline_id.md new file mode 100644 index 00000000000000..fb086b30fb1fba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-indobert_large_p2_finetuned_ner_pipeline_id.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Indonesian indobert_large_p2_finetuned_ner_pipeline pipeline BertForTokenClassification from ageng-anugrah +author: John Snow Labs +name: indobert_large_p2_finetuned_ner_pipeline +date: 2024-11-10 +tags: [id, open_source, pipeline, onnx] +task: Named Entity Recognition +language: id +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indobert_large_p2_finetuned_ner_pipeline` is a Indonesian model originally trained by ageng-anugrah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indobert_large_p2_finetuned_ner_pipeline_id_5.5.1_3.0_1731279467948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indobert_large_p2_finetuned_ner_pipeline_id_5.5.1_3.0_1731279467948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("indobert_large_p2_finetuned_ner_pipeline", lang = "id") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("indobert_large_p2_finetuned_ner_pipeline", lang = "id") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indobert_large_p2_finetuned_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|id| +|Size:|1.3 GB| + +## References + +https://huggingface.co/ageng-anugrah/indobert-large-p2-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-inhibitor_distilbert_en.md b/docs/_posts/ahmedlone127/2024-11-10-inhibitor_distilbert_en.md new file mode 100644 index 00000000000000..92db8c91100ed4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-inhibitor_distilbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English inhibitor_distilbert DistilBertEmbeddings from tonytonfisk +author: John Snow Labs +name: inhibitor_distilbert +date: 2024-11-10 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inhibitor_distilbert` is a English model originally trained by tonytonfisk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inhibitor_distilbert_en_5.5.1_3.0_1731282144663.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inhibitor_distilbert_en_5.5.1_3.0_1731282144663.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("inhibitor_distilbert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("inhibitor_distilbert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inhibitor_distilbert| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|248.3 MB| + +## References + +https://huggingface.co/tonytonfisk/inhibitor_distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-inhibitor_distilbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-inhibitor_distilbert_pipeline_en.md new file mode 100644 index 00000000000000..aa43c9100a1a97 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-inhibitor_distilbert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English inhibitor_distilbert_pipeline pipeline DistilBertEmbeddings from tonytonfisk +author: John Snow Labs +name: inhibitor_distilbert_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inhibitor_distilbert_pipeline` is a English model originally trained by tonytonfisk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inhibitor_distilbert_pipeline_en_5.5.1_3.0_1731282159228.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inhibitor_distilbert_pipeline_en_5.5.1_3.0_1731282159228.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("inhibitor_distilbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("inhibitor_distilbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inhibitor_distilbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|248.3 MB| + +## References + +https://huggingface.co/tonytonfisk/inhibitor_distilbert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-mbert_finnic_ner_en.md b/docs/_posts/ahmedlone127/2024-11-10-mbert_finnic_ner_en.md new file mode 100644 index 00000000000000..8eb7b5f888f5bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-mbert_finnic_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mbert_finnic_ner BertForTokenClassification from azizbarank +author: John Snow Labs +name: mbert_finnic_ner +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_finnic_ner` is a English model originally trained by azizbarank. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_finnic_ner_en_5.5.1_3.0_1731279727500.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_finnic_ner_en_5.5.1_3.0_1731279727500.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("mbert_finnic_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("mbert_finnic_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_finnic_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/azizbarank/mbert-finnic-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-mbert_finnic_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-mbert_finnic_ner_pipeline_en.md new file mode 100644 index 00000000000000..2739e5fab7efce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-mbert_finnic_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mbert_finnic_ner_pipeline pipeline BertForTokenClassification from azizbarank +author: John Snow Labs +name: mbert_finnic_ner_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_finnic_ner_pipeline` is a English model originally trained by azizbarank. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_finnic_ner_pipeline_en_5.5.1_3.0_1731279762269.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_finnic_ner_pipeline_en_5.5.1_3.0_1731279762269.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mbert_finnic_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mbert_finnic_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_finnic_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/azizbarank/mbert-finnic-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-medlid_en.md b/docs/_posts/ahmedlone127/2024-11-10-medlid_en.md new file mode 100644 index 00000000000000..e65ad60dda99c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-medlid_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English medlid BertForTokenClassification from onionLad +author: John Snow Labs +name: medlid +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medlid` is a English model originally trained by onionLad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medlid_en_5.5.1_3.0_1731280081924.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medlid_en_5.5.1_3.0_1731280081924.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("medlid","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("medlid", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medlid| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/onionLad/medlid \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-medlid_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-medlid_pipeline_en.md new file mode 100644 index 00000000000000..f3b2c8473fd975 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-medlid_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English medlid_pipeline pipeline BertForTokenClassification from onionLad +author: John Snow Labs +name: medlid_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medlid_pipeline` is a English model originally trained by onionLad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medlid_pipeline_en_5.5.1_3.0_1731280109992.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medlid_pipeline_en_5.5.1_3.0_1731280109992.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("medlid_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("medlid_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medlid_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/onionLad/medlid + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-mountains_ner_model_en.md b/docs/_posts/ahmedlone127/2024-11-10-mountains_ner_model_en.md new file mode 100644 index 00000000000000..a8bd812e360179 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-mountains_ner_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mountains_ner_model BertForTokenClassification from telord +author: John Snow Labs +name: mountains_ner_model +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mountains_ner_model` is a English model originally trained by telord. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mountains_ner_model_en_5.5.1_3.0_1731279562174.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mountains_ner_model_en_5.5.1_3.0_1731279562174.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("mountains_ner_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("mountains_ner_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mountains_ner_model| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/telord/mountains-ner-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-mountains_ner_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-mountains_ner_model_pipeline_en.md new file mode 100644 index 00000000000000..43c12707a31381 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-mountains_ner_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mountains_ner_model_pipeline pipeline BertForTokenClassification from telord +author: John Snow Labs +name: mountains_ner_model_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mountains_ner_model_pipeline` is a English model originally trained by telord. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mountains_ner_model_pipeline_en_5.5.1_3.0_1731279587775.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mountains_ner_model_pipeline_en_5.5.1_3.0_1731279587775.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mountains_ner_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mountains_ner_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mountains_ner_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/telord/mountains-ner-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-ner_rubert_finetuned_en.md b/docs/_posts/ahmedlone127/2024-11-10-ner_rubert_finetuned_en.md new file mode 100644 index 00000000000000..a005f107af75df --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-ner_rubert_finetuned_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_rubert_finetuned BertForTokenClassification from FlewRr +author: John Snow Labs +name: ner_rubert_finetuned +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_rubert_finetuned` is a English model originally trained by FlewRr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_rubert_finetuned_en_5.5.1_3.0_1731279899847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_rubert_finetuned_en_5.5.1_3.0_1731279899847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("ner_rubert_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("ner_rubert_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_rubert_finetuned| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|667.1 MB| + +## References + +https://huggingface.co/FlewRr/NER-ruBert-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-ner_rubert_finetuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-ner_rubert_finetuned_pipeline_en.md new file mode 100644 index 00000000000000..da687be5bd50ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-ner_rubert_finetuned_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ner_rubert_finetuned_pipeline pipeline BertForTokenClassification from FlewRr +author: John Snow Labs +name: ner_rubert_finetuned_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_rubert_finetuned_pipeline` is a English model originally trained by FlewRr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_rubert_finetuned_pipeline_en_5.5.1_3.0_1731279936268.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_rubert_finetuned_pipeline_en_5.5.1_3.0_1731279936268.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_rubert_finetuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_rubert_finetuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_rubert_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|667.1 MB| + +## References + +https://huggingface.co/FlewRr/NER-ruBert-finetuned + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-nucha_itskillner_bert_en.md b/docs/_posts/ahmedlone127/2024-11-10-nucha_itskillner_bert_en.md new file mode 100644 index 00000000000000..bb497d4f96f9c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-nucha_itskillner_bert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nucha_itskillner_bert BertForTokenClassification from Nucha +author: John Snow Labs +name: nucha_itskillner_bert +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nucha_itskillner_bert` is a English model originally trained by Nucha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nucha_itskillner_bert_en_5.5.1_3.0_1731279445449.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nucha_itskillner_bert_en_5.5.1_3.0_1731279445449.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("nucha_itskillner_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("nucha_itskillner_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nucha_itskillner_bert| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Nucha/Nucha_ITSkillNER_BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-nucha_itskillner_bert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-nucha_itskillner_bert_pipeline_en.md new file mode 100644 index 00000000000000..fee995d1a8a977 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-nucha_itskillner_bert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nucha_itskillner_bert_pipeline pipeline BertForTokenClassification from Nucha +author: John Snow Labs +name: nucha_itskillner_bert_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nucha_itskillner_bert_pipeline` is a English model originally trained by Nucha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nucha_itskillner_bert_pipeline_en_5.5.1_3.0_1731279466576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nucha_itskillner_bert_pipeline_en_5.5.1_3.0_1731279466576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nucha_itskillner_bert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nucha_itskillner_bert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nucha_itskillner_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Nucha/Nucha_ITSkillNER_BERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-pii_mbert_azerbaijani_en.md b/docs/_posts/ahmedlone127/2024-11-10-pii_mbert_azerbaijani_en.md new file mode 100644 index 00000000000000..4121771e83fe48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-pii_mbert_azerbaijani_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English pii_mbert_azerbaijani BertForTokenClassification from toghrultahirov +author: John Snow Labs +name: pii_mbert_azerbaijani +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pii_mbert_azerbaijani` is a English model originally trained by toghrultahirov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pii_mbert_azerbaijani_en_5.5.1_3.0_1731280115377.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pii_mbert_azerbaijani_en_5.5.1_3.0_1731280115377.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("pii_mbert_azerbaijani","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("pii_mbert_azerbaijani", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pii_mbert_azerbaijani| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|625.5 MB| + +## References + +https://huggingface.co/toghrultahirov/pii_mbert_az \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-pii_mbert_azerbaijani_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-pii_mbert_azerbaijani_pipeline_en.md new file mode 100644 index 00000000000000..47e34e24c9e9fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-pii_mbert_azerbaijani_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English pii_mbert_azerbaijani_pipeline pipeline BertForTokenClassification from toghrultahirov +author: John Snow Labs +name: pii_mbert_azerbaijani_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pii_mbert_azerbaijani_pipeline` is a English model originally trained by toghrultahirov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pii_mbert_azerbaijani_pipeline_en_5.5.1_3.0_1731280155707.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pii_mbert_azerbaijani_pipeline_en_5.5.1_3.0_1731280155707.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("pii_mbert_azerbaijani_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("pii_mbert_azerbaijani_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pii_mbert_azerbaijani_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|625.6 MB| + +## References + +https://huggingface.co/toghrultahirov/pii_mbert_az + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-practica_3_model_en.md b/docs/_posts/ahmedlone127/2024-11-10-practica_3_model_en.md new file mode 100644 index 00000000000000..5935bf05c5eecb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-practica_3_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English practica_3_model DistilBertForQuestionAnswering from Almancy +author: John Snow Labs +name: practica_3_model +date: 2024-11-10 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`practica_3_model` is a English model originally trained by Almancy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/practica_3_model_en_5.5.1_3.0_1731280883866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/practica_3_model_en_5.5.1_3.0_1731280883866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("practica_3_model","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("practica_3_model", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|practica_3_model| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Almancy/practica_3_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-practica_3_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-practica_3_model_pipeline_en.md new file mode 100644 index 00000000000000..f3301ae884f6ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-practica_3_model_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English practica_3_model_pipeline pipeline DistilBertForQuestionAnswering from Almancy +author: John Snow Labs +name: practica_3_model_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`practica_3_model_pipeline` is a English model originally trained by Almancy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/practica_3_model_pipeline_en_5.5.1_3.0_1731280897108.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/practica_3_model_pipeline_en_5.5.1_3.0_1731280897108.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("practica_3_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("practica_3_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|practica_3_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Almancy/practica_3_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-queryner_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2024-11-10-queryner_bert_base_uncased_en.md new file mode 100644 index 00000000000000..731545bfff3292 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-queryner_bert_base_uncased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English queryner_bert_base_uncased BertForTokenClassification from bltlab +author: John Snow Labs +name: queryner_bert_base_uncased +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`queryner_bert_base_uncased` is a English model originally trained by bltlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/queryner_bert_base_uncased_en_5.5.1_3.0_1731279498347.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/queryner_bert_base_uncased_en_5.5.1_3.0_1731279498347.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("queryner_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("queryner_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|queryner_bert_base_uncased| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/bltlab/queryner-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-queryner_bert_base_uncased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-queryner_bert_base_uncased_pipeline_en.md new file mode 100644 index 00000000000000..16cdd03ed7df0f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-queryner_bert_base_uncased_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English queryner_bert_base_uncased_pipeline pipeline BertForTokenClassification from bltlab +author: John Snow Labs +name: queryner_bert_base_uncased_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`queryner_bert_base_uncased_pipeline` is a English model originally trained by bltlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/queryner_bert_base_uncased_pipeline_en_5.5.1_3.0_1731279519243.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/queryner_bert_base_uncased_pipeline_en_5.5.1_3.0_1731279519243.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("queryner_bert_base_uncased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("queryner_bert_base_uncased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|queryner_bert_base_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/bltlab/queryner-bert-base-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-rubert_address_elements_pipeline_ru.md b/docs/_posts/ahmedlone127/2024-11-10-rubert_address_elements_pipeline_ru.md new file mode 100644 index 00000000000000..bf04cac1e49356 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-rubert_address_elements_pipeline_ru.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Russian rubert_address_elements_pipeline pipeline BertForTokenClassification from qwazer +author: John Snow Labs +name: rubert_address_elements_pipeline +date: 2024-11-10 +tags: [ru, open_source, pipeline, onnx] +task: Named Entity Recognition +language: ru +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_address_elements_pipeline` is a Russian model originally trained by qwazer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_address_elements_pipeline_ru_5.5.1_3.0_1731279362868.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_address_elements_pipeline_ru_5.5.1_3.0_1731279362868.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rubert_address_elements_pipeline", lang = "ru") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rubert_address_elements_pipeline", lang = "ru") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_address_elements_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ru| +|Size:|109.2 MB| + +## References + +https://huggingface.co/qwazer/rubert-address-elements + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-rubert_address_elements_ru.md b/docs/_posts/ahmedlone127/2024-11-10-rubert_address_elements_ru.md new file mode 100644 index 00000000000000..9a577a3f530fe8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-rubert_address_elements_ru.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Russian rubert_address_elements BertForTokenClassification from qwazer +author: John Snow Labs +name: rubert_address_elements +date: 2024-11-10 +tags: [ru, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: ru +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_address_elements` is a Russian model originally trained by qwazer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_address_elements_ru_5.5.1_3.0_1731279357401.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_address_elements_ru_5.5.1_3.0_1731279357401.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("rubert_address_elements","ru") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("rubert_address_elements", "ru") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_address_elements| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|ru| +|Size:|109.1 MB| + +## References + +https://huggingface.co/qwazer/rubert-address-elements \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-sent_afriberta_v2_large_en.md b/docs/_posts/ahmedlone127/2024-11-10-sent_afriberta_v2_large_en.md new file mode 100644 index 00000000000000..af1a42765e9735 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-sent_afriberta_v2_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_afriberta_v2_large XlmRoBertaSentenceEmbeddings from castorini +author: John Snow Labs +name: sent_afriberta_v2_large +date: 2024-11-10 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_afriberta_v2_large` is a English model originally trained by castorini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_afriberta_v2_large_en_5.5.1_3.0_1731282587875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_afriberta_v2_large_en_5.5.1_3.0_1731282587875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_afriberta_v2_large","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_afriberta_v2_large","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_afriberta_v2_large| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|698.8 MB| + +## References + +https://huggingface.co/castorini/afriberta_v2_large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-sent_afriberta_v2_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-sent_afriberta_v2_large_pipeline_en.md new file mode 100644 index 00000000000000..c960cb84a67faa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-sent_afriberta_v2_large_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_afriberta_v2_large_pipeline pipeline XlmRoBertaSentenceEmbeddings from castorini +author: John Snow Labs +name: sent_afriberta_v2_large_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_afriberta_v2_large_pipeline` is a English model originally trained by castorini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_afriberta_v2_large_pipeline_en_5.5.1_3.0_1731282623156.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_afriberta_v2_large_pipeline_en_5.5.1_3.0_1731282623156.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_afriberta_v2_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_afriberta_v2_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_afriberta_v2_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|699.3 MB| + +## References + +https://huggingface.co/castorini/afriberta_v2_large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-social_bias_ner_en.md b/docs/_posts/ahmedlone127/2024-11-10-social_bias_ner_en.md new file mode 100644 index 00000000000000..5211ed4e32043b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-social_bias_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English social_bias_ner BertForTokenClassification from ethical-spectacle +author: John Snow Labs +name: social_bias_ner +date: 2024-11-10 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`social_bias_ner` is a English model originally trained by ethical-spectacle. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/social_bias_ner_en_5.5.1_3.0_1731279695568.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/social_bias_ner_en_5.5.1_3.0_1731279695568.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("social_bias_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("social_bias_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|social_bias_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/ethical-spectacle/social-bias-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-social_bias_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-social_bias_ner_pipeline_en.md new file mode 100644 index 00000000000000..145a52b8e042c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-social_bias_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English social_bias_ner_pipeline pipeline BertForTokenClassification from ethical-spectacle +author: John Snow Labs +name: social_bias_ner_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`social_bias_ner_pipeline` is a English model originally trained by ethical-spectacle. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/social_bias_ner_pipeline_en_5.5.1_3.0_1731279717050.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/social_bias_ner_pipeline_en_5.5.1_3.0_1731279717050.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("social_bias_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("social_bias_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|social_bias_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/ethical-spectacle/social-bias-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-training_distilbert_base_uncased_finetuned_squad_en.md b/docs/_posts/ahmedlone127/2024-11-10-training_distilbert_base_uncased_finetuned_squad_en.md new file mode 100644 index 00000000000000..1ac813bf6eeed4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-training_distilbert_base_uncased_finetuned_squad_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English training_distilbert_base_uncased_finetuned_squad DistilBertForQuestionAnswering from lizchu414 +author: John Snow Labs +name: training_distilbert_base_uncased_finetuned_squad +date: 2024-11-10 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`training_distilbert_base_uncased_finetuned_squad` is a English model originally trained by lizchu414. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/training_distilbert_base_uncased_finetuned_squad_en_5.5.1_3.0_1731280892071.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/training_distilbert_base_uncased_finetuned_squad_en_5.5.1_3.0_1731280892071.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("training_distilbert_base_uncased_finetuned_squad","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("training_distilbert_base_uncased_finetuned_squad", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|training_distilbert_base_uncased_finetuned_squad| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/lizchu414/training-distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-10-training_distilbert_base_uncased_finetuned_squad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-10-training_distilbert_base_uncased_finetuned_squad_pipeline_en.md new file mode 100644 index 00000000000000..4e86ecff7fe32c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-10-training_distilbert_base_uncased_finetuned_squad_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English training_distilbert_base_uncased_finetuned_squad_pipeline pipeline DistilBertForQuestionAnswering from lizchu414 +author: John Snow Labs +name: training_distilbert_base_uncased_finetuned_squad_pipeline +date: 2024-11-10 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`training_distilbert_base_uncased_finetuned_squad_pipeline` is a English model originally trained by lizchu414. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/training_distilbert_base_uncased_finetuned_squad_pipeline_en_5.5.1_3.0_1731280905863.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/training_distilbert_base_uncased_finetuned_squad_pipeline_en_5.5.1_3.0_1731280905863.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("training_distilbert_base_uncased_finetuned_squad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("training_distilbert_base_uncased_finetuned_squad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|training_distilbert_base_uncased_finetuned_squad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/lizchu414/training-distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-4248_spanbert_base_en.md b/docs/_posts/ahmedlone127/2024-11-11-4248_spanbert_base_en.md new file mode 100644 index 00000000000000..fab5242f737d5e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-4248_spanbert_base_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English 4248_spanbert_base BertForQuestionAnswering from JMatthewChiam +author: John Snow Labs +name: 4248_spanbert_base +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`4248_spanbert_base` is a English model originally trained by JMatthewChiam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/4248_spanbert_base_en_5.5.1_3.0_1731288773376.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/4248_spanbert_base_en_5.5.1_3.0_1731288773376.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("4248_spanbert_base","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("4248_spanbert_base", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|4248_spanbert_base| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|402.9 MB| + +## References + +https://huggingface.co/JMatthewChiam/4248-spanBERT-Base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-4248_spanbert_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-4248_spanbert_base_pipeline_en.md new file mode 100644 index 00000000000000..ee07cc7489f981 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-4248_spanbert_base_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English 4248_spanbert_base_pipeline pipeline BertForQuestionAnswering from JMatthewChiam +author: John Snow Labs +name: 4248_spanbert_base_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`4248_spanbert_base_pipeline` is a English model originally trained by JMatthewChiam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/4248_spanbert_base_pipeline_en_5.5.1_3.0_1731288794996.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/4248_spanbert_base_pipeline_en_5.5.1_3.0_1731288794996.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("4248_spanbert_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("4248_spanbert_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|4248_spanbert_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|402.9 MB| + +## References + +https://huggingface.co/JMatthewChiam/4248-spanBERT-Base + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-affilgood_ner_en.md b/docs/_posts/ahmedlone127/2024-11-11-affilgood_ner_en.md new file mode 100644 index 00000000000000..8af9a2eeb5fc6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-affilgood_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English affilgood_ner RoBertaForTokenClassification from SIRIS-Lab +author: John Snow Labs +name: affilgood_ner +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`affilgood_ner` is a English model originally trained by SIRIS-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/affilgood_ner_en_5.5.1_3.0_1731311681436.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/affilgood_ner_en_5.5.1_3.0_1731311681436.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("affilgood_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("affilgood_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|affilgood_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/SIRIS-Lab/affilgood-NER \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-affilgood_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-affilgood_ner_pipeline_en.md new file mode 100644 index 00000000000000..7c40efccefdad7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-affilgood_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English affilgood_ner_pipeline pipeline RoBertaForTokenClassification from SIRIS-Lab +author: John Snow Labs +name: affilgood_ner_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`affilgood_ner_pipeline` is a English model originally trained by SIRIS-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/affilgood_ner_pipeline_en_5.5.1_3.0_1731311706201.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/affilgood_ner_pipeline_en_5.5.1_3.0_1731311706201.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("affilgood_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("affilgood_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|affilgood_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/SIRIS-Lab/affilgood-NER + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-albert_dataset1_en.md b/docs/_posts/ahmedlone127/2024-11-11-albert_dataset1_en.md new file mode 100644 index 00000000000000..8f4626395f9975 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-albert_dataset1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English albert_dataset1 AlbertForSequenceClassification from aminajunaid0 +author: John Snow Labs +name: albert_dataset1 +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_dataset1` is a English model originally trained by aminajunaid0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_dataset1_en_5.5.1_3.0_1731296925732.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_dataset1_en_5.5.1_3.0_1731296925732.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_dataset1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_dataset1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_dataset1| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/aminajunaid0/Albert_Dataset1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-albert_dataset1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-albert_dataset1_pipeline_en.md new file mode 100644 index 00000000000000..4c0b7e573eb04c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-albert_dataset1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_dataset1_pipeline pipeline AlbertForSequenceClassification from aminajunaid0 +author: John Snow Labs +name: albert_dataset1_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_dataset1_pipeline` is a English model originally trained by aminajunaid0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_dataset1_pipeline_en_5.5.1_3.0_1731296927971.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_dataset1_pipeline_en_5.5.1_3.0_1731296927971.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_dataset1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_dataset1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_dataset1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/aminajunaid0/Albert_Dataset1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-alephbertgimmel_parashoot_he.md b/docs/_posts/ahmedlone127/2024-11-11-alephbertgimmel_parashoot_he.md new file mode 100644 index 00000000000000..036f0606d08beb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-alephbertgimmel_parashoot_he.md @@ -0,0 +1,86 @@ +--- +layout: model +title: Hebrew alephbertgimmel_parashoot BertForQuestionAnswering from imvladikon +author: John Snow Labs +name: alephbertgimmel_parashoot +date: 2024-11-11 +tags: [he, open_source, onnx, question_answering, bert] +task: Question Answering +language: he +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`alephbertgimmel_parashoot` is a Hebrew model originally trained by imvladikon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/alephbertgimmel_parashoot_he_5.5.1_3.0_1731289180085.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/alephbertgimmel_parashoot_he_5.5.1_3.0_1731289180085.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("alephbertgimmel_parashoot","he") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("alephbertgimmel_parashoot", "he") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|alephbertgimmel_parashoot| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|he| +|Size:|690.4 MB| + +## References + +https://huggingface.co/imvladikon/alephbertgimmel_parashoot \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-alephbertgimmel_parashoot_pipeline_he.md b/docs/_posts/ahmedlone127/2024-11-11-alephbertgimmel_parashoot_pipeline_he.md new file mode 100644 index 00000000000000..ead148507c6574 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-alephbertgimmel_parashoot_pipeline_he.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Hebrew alephbertgimmel_parashoot_pipeline pipeline BertForQuestionAnswering from imvladikon +author: John Snow Labs +name: alephbertgimmel_parashoot_pipeline +date: 2024-11-11 +tags: [he, open_source, pipeline, onnx] +task: Question Answering +language: he +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`alephbertgimmel_parashoot_pipeline` is a Hebrew model originally trained by imvladikon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/alephbertgimmel_parashoot_pipeline_he_5.5.1_3.0_1731289217381.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/alephbertgimmel_parashoot_pipeline_he_5.5.1_3.0_1731289217381.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("alephbertgimmel_parashoot_pipeline", lang = "he") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("alephbertgimmel_parashoot_pipeline", lang = "he") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|alephbertgimmel_parashoot_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|he| +|Size:|690.5 MB| + +## References + +https://huggingface.co/imvladikon/alephbertgimmel_parashoot + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-all_mpnet_base_v2_tomaarsen_en.md b/docs/_posts/ahmedlone127/2024-11-11-all_mpnet_base_v2_tomaarsen_en.md new file mode 100644 index 00000000000000..af9dac29e9219b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-all_mpnet_base_v2_tomaarsen_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_tomaarsen MPNetEmbeddings from tomaarsen +author: John Snow Labs +name: all_mpnet_base_v2_tomaarsen +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_tomaarsen` is a English model originally trained by tomaarsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_tomaarsen_en_5.5.1_3.0_1731295094969.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_tomaarsen_en_5.5.1_3.0_1731295094969.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_tomaarsen","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_tomaarsen","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_tomaarsen| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/tomaarsen/all-mpnet-base-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-all_mpnet_base_v2_tomaarsen_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-all_mpnet_base_v2_tomaarsen_pipeline_en.md new file mode 100644 index 00000000000000..05d6ac0d199286 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-all_mpnet_base_v2_tomaarsen_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English all_mpnet_base_v2_tomaarsen_pipeline pipeline MPNetEmbeddings from tomaarsen +author: John Snow Labs +name: all_mpnet_base_v2_tomaarsen_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_tomaarsen_pipeline` is a English model originally trained by tomaarsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_tomaarsen_pipeline_en_5.5.1_3.0_1731295120658.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_tomaarsen_pipeline_en_5.5.1_3.0_1731295120658.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("all_mpnet_base_v2_tomaarsen_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("all_mpnet_base_v2_tomaarsen_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_tomaarsen_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/tomaarsen/all-mpnet-base-v2 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-aspect_based_sentiment_analyzer_using_bert_en.md b/docs/_posts/ahmedlone127/2024-11-11-aspect_based_sentiment_analyzer_using_bert_en.md new file mode 100644 index 00000000000000..ab80e5ab9f3e0f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-aspect_based_sentiment_analyzer_using_bert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English aspect_based_sentiment_analyzer_using_bert BertForSequenceClassification from srimeenakshiks +author: John Snow Labs +name: aspect_based_sentiment_analyzer_using_bert +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`aspect_based_sentiment_analyzer_using_bert` is a English model originally trained by srimeenakshiks. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/aspect_based_sentiment_analyzer_using_bert_en_5.5.1_3.0_1731309636675.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/aspect_based_sentiment_analyzer_using_bert_en_5.5.1_3.0_1731309636675.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("aspect_based_sentiment_analyzer_using_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("aspect_based_sentiment_analyzer_using_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|aspect_based_sentiment_analyzer_using_bert| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/srimeenakshiks/aspect-based-sentiment-analyzer-using-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-auto_adver_en.md b/docs/_posts/ahmedlone127/2024-11-11-auto_adver_en.md new file mode 100644 index 00000000000000..acb4e1755b83d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-auto_adver_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English auto_adver BertForTokenClassification from Phil-AT +author: John Snow Labs +name: auto_adver +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`auto_adver` is a English model originally trained by Phil-AT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/auto_adver_en_5.5.1_3.0_1731299498613.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/auto_adver_en_5.5.1_3.0_1731299498613.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("auto_adver","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("auto_adver", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|auto_adver| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Phil-AT/Auto-Adver \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-auto_adver_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-auto_adver_pipeline_en.md new file mode 100644 index 00000000000000..1ba9a77efacb57 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-auto_adver_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English auto_adver_pipeline pipeline BertForTokenClassification from Phil-AT +author: John Snow Labs +name: auto_adver_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`auto_adver_pipeline` is a English model originally trained by Phil-AT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/auto_adver_pipeline_en_5.5.1_3.0_1731299561942.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/auto_adver_pipeline_en_5.5.1_3.0_1731299561942.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("auto_adver_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("auto_adver_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|auto_adver_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Phil-AT/Auto-Adver + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-autotrain_gamblingtips_43804110844_en.md b/docs/_posts/ahmedlone127/2024-11-11-autotrain_gamblingtips_43804110844_en.md new file mode 100644 index 00000000000000..4dea1d91bc0d82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-autotrain_gamblingtips_43804110844_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English autotrain_gamblingtips_43804110844 BertForQuestionAnswering from Berrisaur +author: John Snow Labs +name: autotrain_gamblingtips_43804110844 +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_gamblingtips_43804110844` is a English model originally trained by Berrisaur. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_gamblingtips_43804110844_en_5.5.1_3.0_1731289457467.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_gamblingtips_43804110844_en_5.5.1_3.0_1731289457467.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("autotrain_gamblingtips_43804110844","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("autotrain_gamblingtips_43804110844", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_gamblingtips_43804110844| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/Berrisaur/autotrain-gamblingtips-43804110844 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-autotrain_gamblingtips_43804110844_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-autotrain_gamblingtips_43804110844_pipeline_en.md new file mode 100644 index 00000000000000..a5ad5671bbf392 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-autotrain_gamblingtips_43804110844_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English autotrain_gamblingtips_43804110844_pipeline pipeline BertForQuestionAnswering from Berrisaur +author: John Snow Labs +name: autotrain_gamblingtips_43804110844_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_gamblingtips_43804110844_pipeline` is a English model originally trained by Berrisaur. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_gamblingtips_43804110844_pipeline_en_5.5.1_3.0_1731289523769.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_gamblingtips_43804110844_pipeline_en_5.5.1_3.0_1731289523769.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("autotrain_gamblingtips_43804110844_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("autotrain_gamblingtips_43804110844_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_gamblingtips_43804110844_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/Berrisaur/autotrain-gamblingtips-43804110844 + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-autotrain_nzog3_ca819_en.md b/docs/_posts/ahmedlone127/2024-11-11-autotrain_nzog3_ca819_en.md new file mode 100644 index 00000000000000..289dcdde1404d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-autotrain_nzog3_ca819_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English autotrain_nzog3_ca819 MPNetForSequenceClassification from ulisesbravo +author: John Snow Labs +name: autotrain_nzog3_ca819 +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_nzog3_ca819` is a English model originally trained by ulisesbravo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_nzog3_ca819_en_5.5.1_3.0_1731301504833.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_nzog3_ca819_en_5.5.1_3.0_1731301504833.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("autotrain_nzog3_ca819","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("autotrain_nzog3_ca819", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_nzog3_ca819| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.0 MB| + +## References + +https://huggingface.co/ulisesbravo/autotrain-nzog3-ca819 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-autotrain_nzog3_ca819_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-autotrain_nzog3_ca819_pipeline_en.md new file mode 100644 index 00000000000000..7b5800c54a6b56 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-autotrain_nzog3_ca819_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English autotrain_nzog3_ca819_pipeline pipeline MPNetForSequenceClassification from ulisesbravo +author: John Snow Labs +name: autotrain_nzog3_ca819_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_nzog3_ca819_pipeline` is a English model originally trained by ulisesbravo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_nzog3_ca819_pipeline_en_5.5.1_3.0_1731301527188.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_nzog3_ca819_pipeline_en_5.5.1_3.0_1731301527188.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("autotrain_nzog3_ca819_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("autotrain_nzog3_ca819_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_nzog3_ca819_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.0 MB| + +## References + +https://huggingface.co/ulisesbravo/autotrain-nzog3-ca819 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-banglabert_qa_en.md b/docs/_posts/ahmedlone127/2024-11-11-banglabert_qa_en.md new file mode 100644 index 00000000000000..d6f037773497a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-banglabert_qa_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English banglabert_qa BertForQuestionAnswering from sanzanalora +author: John Snow Labs +name: banglabert_qa +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`banglabert_qa` is a English model originally trained by sanzanalora. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/banglabert_qa_en_5.5.1_3.0_1731307694600.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/banglabert_qa_en_5.5.1_3.0_1731307694600.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("banglabert_qa","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("banglabert_qa", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|banglabert_qa| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|412.2 MB| + +## References + +https://huggingface.co/sanzanalora/banglabert-qa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-banglabert_qa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-banglabert_qa_pipeline_en.md new file mode 100644 index 00000000000000..d3a3acf3d80a0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-banglabert_qa_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English banglabert_qa_pipeline pipeline BertForQuestionAnswering from sanzanalora +author: John Snow Labs +name: banglabert_qa_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`banglabert_qa_pipeline` is a English model originally trained by sanzanalora. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/banglabert_qa_pipeline_en_5.5.1_3.0_1731307717699.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/banglabert_qa_pipeline_en_5.5.1_3.0_1731307717699.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("banglabert_qa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("banglabert_qa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|banglabert_qa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.2 MB| + +## References + +https://huggingface.co/sanzanalora/banglabert-qa + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_340m_ft_first_1000_pref_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_340m_ft_first_1000_pref_en.md new file mode 100644 index 00000000000000..9fa6307ce1f59f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_340m_ft_first_1000_pref_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_340m_ft_first_1000_pref BertForSequenceClassification from SeppeV +author: John Snow Labs +name: bert_340m_ft_first_1000_pref +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_340m_ft_first_1000_pref` is a English model originally trained by SeppeV. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_340m_ft_first_1000_pref_en_5.5.1_3.0_1731309886322.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_340m_ft_first_1000_pref_en_5.5.1_3.0_1731309886322.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_340m_ft_first_1000_pref","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_340m_ft_first_1000_pref", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_340m_ft_first_1000_pref| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/SeppeV/bert_340M_ft_first_1000_pref \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_340m_ft_first_1000_pref_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_340m_ft_first_1000_pref_pipeline_en.md new file mode 100644 index 00000000000000..d2d9ac3c3a8b08 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_340m_ft_first_1000_pref_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_340m_ft_first_1000_pref_pipeline pipeline BertForSequenceClassification from SeppeV +author: John Snow Labs +name: bert_340m_ft_first_1000_pref_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_340m_ft_first_1000_pref_pipeline` is a English model originally trained by SeppeV. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_340m_ft_first_1000_pref_pipeline_en_5.5.1_3.0_1731309950078.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_340m_ft_first_1000_pref_pipeline_en_5.5.1_3.0_1731309950078.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_340m_ft_first_1000_pref_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_340m_ft_first_1000_pref_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_340m_ft_first_1000_pref_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/SeppeV/bert_340M_ft_first_1000_pref + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_abbrev_cased_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_abbrev_cased_en.md new file mode 100644 index 00000000000000..6c7e0b4259682b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_abbrev_cased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_abbrev_cased BertForTokenClassification from batterydata +author: John Snow Labs +name: bert_abbrev_cased +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_abbrev_cased` is a English model originally trained by batterydata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_abbrev_cased_en_5.5.1_3.0_1731290509572.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_abbrev_cased_en_5.5.1_3.0_1731290509572.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_abbrev_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_abbrev_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_abbrev_cased| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/batterydata/bert-abbrev-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_abbrev_cased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_abbrev_cased_pipeline_en.md new file mode 100644 index 00000000000000..7b6f92646d60e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_abbrev_cased_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_abbrev_cased_pipeline pipeline BertForTokenClassification from batterydata +author: John Snow Labs +name: bert_abbrev_cased_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_abbrev_cased_pipeline` is a English model originally trained by batterydata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_abbrev_cased_pipeline_en_5.5.1_3.0_1731290530928.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_abbrev_cased_pipeline_en_5.5.1_3.0_1731290530928.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_abbrev_cased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_abbrev_cased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_abbrev_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/batterydata/bert-abbrev-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_arabert_bioner_english_arabic_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_arabert_bioner_english_arabic_en.md new file mode 100644 index 00000000000000..4920bbd7f432f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_arabert_bioner_english_arabic_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_base_arabert_bioner_english_arabic BertForTokenClassification from StivenLancheros +author: John Snow Labs +name: bert_base_arabert_bioner_english_arabic +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arabert_bioner_english_arabic` is a English model originally trained by StivenLancheros. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arabert_bioner_english_arabic_en_5.5.1_3.0_1731286022889.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arabert_bioner_english_arabic_en_5.5.1_3.0_1731286022889.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_base_arabert_bioner_english_arabic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_base_arabert_bioner_english_arabic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arabert_bioner_english_arabic| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|504.6 MB| + +## References + +https://huggingface.co/StivenLancheros/bert-base-arabert-BioNER-EN-AR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_arabert_bioner_english_arabic_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_arabert_bioner_english_arabic_pipeline_en.md new file mode 100644 index 00000000000000..ae57ca59e1ee70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_arabert_bioner_english_arabic_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_arabert_bioner_english_arabic_pipeline pipeline BertForTokenClassification from StivenLancheros +author: John Snow Labs +name: bert_base_arabert_bioner_english_arabic_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arabert_bioner_english_arabic_pipeline` is a English model originally trained by StivenLancheros. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arabert_bioner_english_arabic_pipeline_en_5.5.1_3.0_1731286049165.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arabert_bioner_english_arabic_pipeline_en_5.5.1_3.0_1731286049165.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_arabert_bioner_english_arabic_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_arabert_bioner_english_arabic_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arabert_bioner_english_arabic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|504.6 MB| + +## References + +https://huggingface.co/StivenLancheros/bert-base-arabert-BioNER-EN-AR + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_cased_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_cased_finetuned_ner_en.md new file mode 100644 index 00000000000000..d7fdba3275af57 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_cased_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_base_cased_finetuned_ner BertForTokenClassification from yuridrcosta +author: John Snow Labs +name: bert_base_cased_finetuned_ner +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_ner` is a English model originally trained by yuridrcosta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_ner_en_5.5.1_3.0_1731298877348.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_ner_en_5.5.1_3.0_1731298877348.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_base_cased_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_base_cased_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/yuridrcosta/bert-base-cased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_cased_finetuned_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_cased_finetuned_ner_pipeline_en.md new file mode 100644 index 00000000000000..859cf1160e45d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_cased_finetuned_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_cased_finetuned_ner_pipeline pipeline BertForTokenClassification from yuridrcosta +author: John Snow Labs +name: bert_base_cased_finetuned_ner_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_ner_pipeline` is a English model originally trained by yuridrcosta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_ner_pipeline_en_5.5.1_3.0_1731298898561.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_ner_pipeline_en_5.5.1_3.0_1731298898561.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_cased_finetuned_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_cased_finetuned_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/yuridrcosta/bert-base-cased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_cased_mnli_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_cased_mnli_en.md new file mode 100644 index 00000000000000..a4d2055afcc2b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_cased_mnli_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_base_cased_mnli BertForSequenceClassification from WillHeld +author: John Snow Labs +name: bert_base_cased_mnli +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_mnli` is a English model originally trained by WillHeld. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_mnli_en_5.5.1_3.0_1731309647852.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_mnli_en_5.5.1_3.0_1731309647852.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_cased_mnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_cased_mnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_mnli| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|406.0 MB| + +## References + +https://huggingface.co/WillHeld/bert-base-cased-mnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_cased_mnli_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_cased_mnli_pipeline_en.md new file mode 100644 index 00000000000000..fcab768f11c4fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_cased_mnli_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_cased_mnli_pipeline pipeline BertForSequenceClassification from WillHeld +author: John Snow Labs +name: bert_base_cased_mnli_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_mnli_pipeline` is a English model originally trained by WillHeld. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_mnli_pipeline_en_5.5.1_3.0_1731309670318.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_mnli_pipeline_en_5.5.1_3.0_1731309670318.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_cased_mnli_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_cased_mnli_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_mnli_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.0 MB| + +## References + +https://huggingface.co/WillHeld/bert-base-cased-mnli + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_chinese_finetuned_ner_v1_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_chinese_finetuned_ner_v1_en.md new file mode 100644 index 00000000000000..05b7da45abe151 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_chinese_finetuned_ner_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_base_chinese_finetuned_ner_v1 BertForTokenClassification from leonadase +author: John Snow Labs +name: bert_base_chinese_finetuned_ner_v1 +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_finetuned_ner_v1` is a English model originally trained by leonadase. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_finetuned_ner_v1_en_5.5.1_3.0_1731290477549.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_finetuned_ner_v1_en_5.5.1_3.0_1731290477549.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_base_chinese_finetuned_ner_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_base_chinese_finetuned_ner_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_finetuned_ner_v1| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|381.1 MB| + +## References + +https://huggingface.co/leonadase/bert-base-chinese-finetuned-ner-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_chinese_finetuned_ner_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_chinese_finetuned_ner_v1_pipeline_en.md new file mode 100644 index 00000000000000..8049ea6a29d62a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_chinese_finetuned_ner_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_chinese_finetuned_ner_v1_pipeline pipeline BertForTokenClassification from leonadase +author: John Snow Labs +name: bert_base_chinese_finetuned_ner_v1_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_finetuned_ner_v1_pipeline` is a English model originally trained by leonadase. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_finetuned_ner_v1_pipeline_en_5.5.1_3.0_1731290498443.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_finetuned_ner_v1_pipeline_en_5.5.1_3.0_1731290498443.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_chinese_finetuned_ner_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_chinese_finetuned_ner_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_finetuned_ner_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|381.1 MB| + +## References + +https://huggingface.co/leonadase/bert-base-chinese-finetuned-ner-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_spanish_wwm_cased_ehealth_kd_es.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_spanish_wwm_cased_ehealth_kd_es.md new file mode 100644 index 00000000000000..63ce36af42d090 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_spanish_wwm_cased_ehealth_kd_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish bert_base_spanish_wwm_cased_ehealth_kd BertForTokenClassification from IIC +author: John Snow Labs +name: bert_base_spanish_wwm_cased_ehealth_kd +date: 2024-11-11 +tags: [es, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_spanish_wwm_cased_ehealth_kd` is a Castilian, Spanish model originally trained by IIC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_ehealth_kd_es_5.5.1_3.0_1731290730612.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_ehealth_kd_es_5.5.1_3.0_1731290730612.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_base_spanish_wwm_cased_ehealth_kd","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_base_spanish_wwm_cased_ehealth_kd", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_spanish_wwm_cased_ehealth_kd| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|409.5 MB| + +## References + +https://huggingface.co/IIC/bert-base-spanish-wwm-cased-ehealth_kd \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_spanish_wwm_cased_ehealth_kd_pipeline_es.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_spanish_wwm_cased_ehealth_kd_pipeline_es.md new file mode 100644 index 00000000000000..2ea3f926e3ea7c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_spanish_wwm_cased_ehealth_kd_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish bert_base_spanish_wwm_cased_ehealth_kd_pipeline pipeline BertForTokenClassification from IIC +author: John Snow Labs +name: bert_base_spanish_wwm_cased_ehealth_kd_pipeline +date: 2024-11-11 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_spanish_wwm_cased_ehealth_kd_pipeline` is a Castilian, Spanish model originally trained by IIC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_ehealth_kd_pipeline_es_5.5.1_3.0_1731290752512.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_ehealth_kd_pipeline_es_5.5.1_3.0_1731290752512.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_spanish_wwm_cased_ehealth_kd_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_spanish_wwm_cased_ehealth_kd_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_spanish_wwm_cased_ehealth_kd_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|409.5 MB| + +## References + +https://huggingface.co/IIC/bert-base-spanish-wwm-cased-ehealth_kd + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_ai4privacy_english_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_ai4privacy_english_en.md new file mode 100644 index 00000000000000..abda28dd87ce7d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_ai4privacy_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_base_uncased_ai4privacy_english BertForTokenClassification from xXiaobuding +author: John Snow Labs +name: bert_base_uncased_ai4privacy_english +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_ai4privacy_english` is a English model originally trained by xXiaobuding. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ai4privacy_english_en_5.5.1_3.0_1731285288447.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ai4privacy_english_en_5.5.1_3.0_1731285288447.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_base_uncased_ai4privacy_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_base_uncased_ai4privacy_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_ai4privacy_english| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.5 MB| + +## References + +https://huggingface.co/xXiaobuding/bert-base-uncased_ai4privacy_en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_ai4privacy_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_ai4privacy_english_pipeline_en.md new file mode 100644 index 00000000000000..3f1af225a7e1d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_ai4privacy_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_uncased_ai4privacy_english_pipeline pipeline BertForTokenClassification from xXiaobuding +author: John Snow Labs +name: bert_base_uncased_ai4privacy_english_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_ai4privacy_english_pipeline` is a English model originally trained by xXiaobuding. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ai4privacy_english_pipeline_en_5.5.1_3.0_1731285310338.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ai4privacy_english_pipeline_en_5.5.1_3.0_1731285310338.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_uncased_ai4privacy_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_uncased_ai4privacy_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_ai4privacy_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.5 MB| + +## References + +https://huggingface.co/xXiaobuding/bert-base-uncased_ai4privacy_en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_emotion_prikshit7766_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_emotion_prikshit7766_en.md new file mode 100644 index 00000000000000..e53b97aac910c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_emotion_prikshit7766_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_base_uncased_emotion_prikshit7766 BertForSequenceClassification from Prikshit7766 +author: John Snow Labs +name: bert_base_uncased_emotion_prikshit7766 +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_emotion_prikshit7766` is a English model originally trained by Prikshit7766. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_emotion_prikshit7766_en_5.5.1_3.0_1731310162648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_emotion_prikshit7766_en_5.5.1_3.0_1731310162648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_emotion_prikshit7766","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_emotion_prikshit7766", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_emotion_prikshit7766| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Prikshit7766/bert-base-uncased-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_emotion_prikshit7766_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_emotion_prikshit7766_pipeline_en.md new file mode 100644 index 00000000000000..4a154aae33762a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_emotion_prikshit7766_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_uncased_emotion_prikshit7766_pipeline pipeline BertForSequenceClassification from Prikshit7766 +author: John Snow Labs +name: bert_base_uncased_emotion_prikshit7766_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_emotion_prikshit7766_pipeline` is a English model originally trained by Prikshit7766. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_emotion_prikshit7766_pipeline_en_5.5.1_3.0_1731310189528.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_emotion_prikshit7766_pipeline_en_5.5.1_3.0_1731310189528.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_uncased_emotion_prikshit7766_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_uncased_emotion_prikshit7766_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_emotion_prikshit7766_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Prikshit7766/bert-base-uncased-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_figurative_language_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_figurative_language_en.md new file mode 100644 index 00000000000000..13849f955bb2ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_figurative_language_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_base_uncased_figurative_language BertForQuestionAnswering from DunnBC22 +author: John Snow Labs +name: bert_base_uncased_figurative_language +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_figurative_language` is a English model originally trained by DunnBC22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_figurative_language_en_5.5.1_3.0_1731308200930.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_figurative_language_en_5.5.1_3.0_1731308200930.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("bert_base_uncased_figurative_language","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("bert_base_uncased_figurative_language", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_figurative_language| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/DunnBC22/bert-base-uncased-Figurative_Language \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_figurative_language_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_figurative_language_pipeline_en.md new file mode 100644 index 00000000000000..1776fc9fe52fe7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_figurative_language_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_base_uncased_figurative_language_pipeline pipeline BertForQuestionAnswering from DunnBC22 +author: John Snow Labs +name: bert_base_uncased_figurative_language_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_figurative_language_pipeline` is a English model originally trained by DunnBC22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_figurative_language_pipeline_en_5.5.1_3.0_1731308223519.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_figurative_language_pipeline_en_5.5.1_3.0_1731308223519.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_uncased_figurative_language_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_uncased_figurative_language_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_figurative_language_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/DunnBC22/bert-base-uncased-Figurative_Language + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_finetuned_triviaqa_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_finetuned_triviaqa_en.md new file mode 100644 index 00000000000000..5d53f223d9f537 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_finetuned_triviaqa_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_triviaqa BertForQuestionAnswering from mirbostani +author: John Snow Labs +name: bert_base_uncased_finetuned_triviaqa +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_triviaqa` is a English model originally trained by mirbostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_triviaqa_en_5.5.1_3.0_1731308084796.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_triviaqa_en_5.5.1_3.0_1731308084796.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("bert_base_uncased_finetuned_triviaqa","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("bert_base_uncased_finetuned_triviaqa", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_triviaqa| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/mirbostani/bert-base-uncased-finetuned-triviaqa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_finetuned_triviaqa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_finetuned_triviaqa_pipeline_en.md new file mode 100644 index 00000000000000..df63c8a41e3860 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_finetuned_triviaqa_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_triviaqa_pipeline pipeline BertForQuestionAnswering from mirbostani +author: John Snow Labs +name: bert_base_uncased_finetuned_triviaqa_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_triviaqa_pipeline` is a English model originally trained by mirbostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_triviaqa_pipeline_en_5.5.1_3.0_1731308106385.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_triviaqa_pipeline_en_5.5.1_3.0_1731308106385.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_uncased_finetuned_triviaqa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_uncased_finetuned_triviaqa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_triviaqa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/mirbostani/bert-base-uncased-finetuned-triviaqa + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_sba_clf_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_sba_clf_en.md new file mode 100644 index 00000000000000..cb245e60cadf96 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_sba_clf_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_base_uncased_sba_clf BertForSequenceClassification from ahmedselhady +author: John Snow Labs +name: bert_base_uncased_sba_clf +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_sba_clf` is a English model originally trained by ahmedselhady. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sba_clf_en_5.5.1_3.0_1731309789010.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sba_clf_en_5.5.1_3.0_1731309789010.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_sba_clf","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_sba_clf", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_sba_clf| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/ahmedselhady/bert-base-uncased-sba-clf \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_sba_clf_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_sba_clf_pipeline_en.md new file mode 100644 index 00000000000000..6091255e81402f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_base_uncased_sba_clf_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_uncased_sba_clf_pipeline pipeline BertForSequenceClassification from ahmedselhady +author: John Snow Labs +name: bert_base_uncased_sba_clf_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_sba_clf_pipeline` is a English model originally trained by ahmedselhady. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sba_clf_pipeline_en_5.5.1_3.0_1731309811038.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sba_clf_pipeline_en_5.5.1_3.0_1731309811038.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_uncased_sba_clf_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_uncased_sba_clf_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_sba_clf_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/ahmedselhady/bert-base-uncased-sba-clf + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_classifier_spanish_news_classification_headlines_es.md b/docs/_posts/ahmedlone127/2024-11-11-bert_classifier_spanish_news_classification_headlines_es.md new file mode 100644 index 00000000000000..946e766f6b0406 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_classifier_spanish_news_classification_headlines_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish bert_classifier_spanish_news_classification_headlines BertForSequenceClassification from M47Labs +author: John Snow Labs +name: bert_classifier_spanish_news_classification_headlines +date: 2024-11-11 +tags: [es, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_classifier_spanish_news_classification_headlines` is a Castilian, Spanish model originally trained by M47Labs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_classifier_spanish_news_classification_headlines_es_5.5.1_3.0_1731309478600.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_classifier_spanish_news_classification_headlines_es_5.5.1_3.0_1731309478600.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_classifier_spanish_news_classification_headlines","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_classifier_spanish_news_classification_headlines", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_classifier_spanish_news_classification_headlines| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|es| +|Size:|411.7 MB| + +## References + +https://huggingface.co/M47Labs/spanish_news_classification_headlines \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_classifier_spanish_news_classification_headlines_pipeline_es.md b/docs/_posts/ahmedlone127/2024-11-11-bert_classifier_spanish_news_classification_headlines_pipeline_es.md new file mode 100644 index 00000000000000..70a54f52790f47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_classifier_spanish_news_classification_headlines_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish bert_classifier_spanish_news_classification_headlines_pipeline pipeline BertForSequenceClassification from M47Labs +author: John Snow Labs +name: bert_classifier_spanish_news_classification_headlines_pipeline +date: 2024-11-11 +tags: [es, open_source, pipeline, onnx] +task: Text Classification +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_classifier_spanish_news_classification_headlines_pipeline` is a Castilian, Spanish model originally trained by M47Labs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_classifier_spanish_news_classification_headlines_pipeline_es_5.5.1_3.0_1731309503064.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_classifier_spanish_news_classification_headlines_pipeline_es_5.5.1_3.0_1731309503064.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_classifier_spanish_news_classification_headlines_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_classifier_spanish_news_classification_headlines_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_classifier_spanish_news_classification_headlines_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|411.8 MB| + +## References + +https://huggingface.co/M47Labs/spanish_news_classification_headlines + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_arcchialogy_ner_hp_tunned_hgf_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_arcchialogy_ner_hp_tunned_hgf_en.md new file mode 100644 index 00000000000000..945120133a1b34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_arcchialogy_ner_hp_tunned_hgf_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_arcchialogy_ner_hp_tunned_hgf BertForTokenClassification from nstrn-mo +author: John Snow Labs +name: bert_finetuned_arcchialogy_ner_hp_tunned_hgf +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_arcchialogy_ner_hp_tunned_hgf` is a English model originally trained by nstrn-mo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_arcchialogy_ner_hp_tunned_hgf_en_5.5.1_3.0_1731286093619.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_arcchialogy_ner_hp_tunned_hgf_en_5.5.1_3.0_1731286093619.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_arcchialogy_ner_hp_tunned_hgf","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_arcchialogy_ner_hp_tunned_hgf", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_arcchialogy_ner_hp_tunned_hgf| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/nstrn-mo/bert-finetuned-arcchialogy-ner-hp-tunned-hgf \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_arcchialogy_ner_hp_tunned_hgf_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_arcchialogy_ner_hp_tunned_hgf_pipeline_en.md new file mode 100644 index 00000000000000..4cd4824a5257ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_arcchialogy_ner_hp_tunned_hgf_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_arcchialogy_ner_hp_tunned_hgf_pipeline pipeline BertForTokenClassification from nstrn-mo +author: John Snow Labs +name: bert_finetuned_arcchialogy_ner_hp_tunned_hgf_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_arcchialogy_ner_hp_tunned_hgf_pipeline` is a English model originally trained by nstrn-mo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_arcchialogy_ner_hp_tunned_hgf_pipeline_en_5.5.1_3.0_1731286113957.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_arcchialogy_ner_hp_tunned_hgf_pipeline_en_5.5.1_3.0_1731286113957.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_arcchialogy_ner_hp_tunned_hgf_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_arcchialogy_ner_hp_tunned_hgf_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_arcchialogy_ner_hp_tunned_hgf_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/nstrn-mo/bert-finetuned-arcchialogy-ner-hp-tunned-hgf + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner4_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner4_en.md new file mode 100644 index 00000000000000..d489e3c0465fe7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_ner4 BertForTokenClassification from kabear +author: John Snow Labs +name: bert_finetuned_ner4 +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner4` is a English model originally trained by kabear. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner4_en_5.5.1_3.0_1731290275551.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner4_en_5.5.1_3.0_1731290275551.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner4| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/kabear/bert-finetuned-ner4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner4_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner4_pipeline_en.md new file mode 100644 index 00000000000000..354524cbb23379 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner4_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_ner4_pipeline pipeline BertForTokenClassification from kabear +author: John Snow Labs +name: bert_finetuned_ner4_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner4_pipeline` is a English model originally trained by kabear. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner4_pipeline_en_5.5.1_3.0_1731290296627.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner4_pipeline_en_5.5.1_3.0_1731290296627.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_ner4_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_ner4_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/kabear/bert-finetuned-ner4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_dylanalloy_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_dylanalloy_en.md new file mode 100644 index 00000000000000..79a4b816f47bd9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_dylanalloy_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_ner_dylanalloy BertForTokenClassification from dylanalloy +author: John Snow Labs +name: bert_finetuned_ner_dylanalloy +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_dylanalloy` is a English model originally trained by dylanalloy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_dylanalloy_en_5.5.1_3.0_1731290622914.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_dylanalloy_en_5.5.1_3.0_1731290622914.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_dylanalloy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_dylanalloy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_dylanalloy| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/dylanalloy/bert-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_dylanalloy_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_dylanalloy_pipeline_en.md new file mode 100644 index 00000000000000..06d8a06ecb6c91 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_dylanalloy_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_ner_dylanalloy_pipeline pipeline BertForTokenClassification from dylanalloy +author: John Snow Labs +name: bert_finetuned_ner_dylanalloy_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_dylanalloy_pipeline` is a English model originally trained by dylanalloy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_dylanalloy_pipeline_en_5.5.1_3.0_1731290643740.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_dylanalloy_pipeline_en_5.5.1_3.0_1731290643740.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_ner_dylanalloy_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_ner_dylanalloy_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_dylanalloy_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/dylanalloy/bert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_joshuaaax_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_joshuaaax_en.md new file mode 100644 index 00000000000000..25be0b2d8d6e46 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_joshuaaax_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_ner_joshuaaax BertForTokenClassification from JoshuaAAX +author: John Snow Labs +name: bert_finetuned_ner_joshuaaax +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_joshuaaax` is a English model originally trained by JoshuaAAX. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_joshuaaax_en_5.5.1_3.0_1731285567463.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_joshuaaax_en_5.5.1_3.0_1731285567463.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_joshuaaax","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_joshuaaax", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_joshuaaax| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/JoshuaAAX/bert-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_joshuaaax_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_joshuaaax_pipeline_en.md new file mode 100644 index 00000000000000..dfa2e34b5b0404 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_joshuaaax_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_ner_joshuaaax_pipeline pipeline BertForTokenClassification from JoshuaAAX +author: John Snow Labs +name: bert_finetuned_ner_joshuaaax_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_joshuaaax_pipeline` is a English model originally trained by JoshuaAAX. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_joshuaaax_pipeline_en_5.5.1_3.0_1731285588723.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_joshuaaax_pipeline_en_5.5.1_3.0_1731285588723.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_ner_joshuaaax_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_ner_joshuaaax_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_joshuaaax_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/JoshuaAAX/bert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_nicodeb_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_nicodeb_en.md new file mode 100644 index 00000000000000..d03ee6a386a16b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_nicodeb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_ner_nicodeb BertForTokenClassification from nicodeb +author: John Snow Labs +name: bert_finetuned_ner_nicodeb +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_nicodeb` is a English model originally trained by nicodeb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_nicodeb_en_5.5.1_3.0_1731285509207.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_nicodeb_en_5.5.1_3.0_1731285509207.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_nicodeb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_nicodeb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_nicodeb| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/nicodeb/bert-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_nicodeb_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_nicodeb_pipeline_en.md new file mode 100644 index 00000000000000..4a314a016fde66 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_nicodeb_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_ner_nicodeb_pipeline pipeline BertForTokenClassification from nicodeb +author: John Snow Labs +name: bert_finetuned_ner_nicodeb_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_nicodeb_pipeline` is a English model originally trained by nicodeb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_nicodeb_pipeline_en_5.5.1_3.0_1731285532547.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_nicodeb_pipeline_en_5.5.1_3.0_1731285532547.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_ner_nicodeb_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_ner_nicodeb_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_nicodeb_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/nicodeb/bert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_savoxism_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_savoxism_en.md new file mode 100644 index 00000000000000..7fde38c647fb3e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_savoxism_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_ner_savoxism BertForTokenClassification from Savoxism +author: John Snow Labs +name: bert_finetuned_ner_savoxism +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_savoxism` is a English model originally trained by Savoxism. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_savoxism_en_5.5.1_3.0_1731285227266.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_savoxism_en_5.5.1_3.0_1731285227266.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_savoxism","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_savoxism", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_savoxism| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/Savoxism/bert-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_savoxism_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_savoxism_pipeline_en.md new file mode 100644 index 00000000000000..f14162b22365e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_savoxism_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_ner_savoxism_pipeline pipeline BertForTokenClassification from Savoxism +author: John Snow Labs +name: bert_finetuned_ner_savoxism_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_savoxism_pipeline` is a English model originally trained by Savoxism. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_savoxism_pipeline_en_5.5.1_3.0_1731285251578.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_savoxism_pipeline_en_5.5.1_3.0_1731285251578.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_ner_savoxism_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_ner_savoxism_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_savoxism_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/Savoxism/bert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_tornqvistmax_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_tornqvistmax_en.md new file mode 100644 index 00000000000000..9e8a3506c48127 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_tornqvistmax_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_ner_tornqvistmax BertForTokenClassification from tornqvistmax +author: John Snow Labs +name: bert_finetuned_ner_tornqvistmax +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_tornqvistmax` is a English model originally trained by tornqvistmax. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_tornqvistmax_en_5.5.1_3.0_1731290123499.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_tornqvistmax_en_5.5.1_3.0_1731290123499.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_tornqvistmax","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_tornqvistmax", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_tornqvistmax| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/tornqvistmax/bert-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_tornqvistmax_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_tornqvistmax_pipeline_en.md new file mode 100644 index 00000000000000..4ea57ab9740a0b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_tornqvistmax_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_ner_tornqvistmax_pipeline pipeline BertForTokenClassification from tornqvistmax +author: John Snow Labs +name: bert_finetuned_ner_tornqvistmax_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_tornqvistmax_pipeline` is a English model originally trained by tornqvistmax. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_tornqvistmax_pipeline_en_5.5.1_3.0_1731290144174.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_tornqvistmax_pipeline_en_5.5.1_3.0_1731290144174.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_ner_tornqvistmax_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_ner_tornqvistmax_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_tornqvistmax_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.8 MB| + +## References + +https://huggingface.co/tornqvistmax/bert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_viktoryes_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_viktoryes_en.md new file mode 100644 index 00000000000000..76e27e111f99aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_viktoryes_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_ner_viktoryes BertForTokenClassification from viktoryes +author: John Snow Labs +name: bert_finetuned_ner_viktoryes +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_viktoryes` is a English model originally trained by viktoryes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_viktoryes_en_5.5.1_3.0_1731299620506.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_viktoryes_en_5.5.1_3.0_1731299620506.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_viktoryes","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_viktoryes", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_viktoryes| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/viktoryes/bert-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_viktoryes_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_viktoryes_pipeline_en.md new file mode 100644 index 00000000000000..e0e37af8b14502 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_ner_viktoryes_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_ner_viktoryes_pipeline pipeline BertForTokenClassification from viktoryes +author: John Snow Labs +name: bert_finetuned_ner_viktoryes_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_viktoryes_pipeline` is a English model originally trained by viktoryes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_viktoryes_pipeline_en_5.5.1_3.0_1731299640955.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_viktoryes_pipeline_en_5.5.1_3.0_1731299640955.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_ner_viktoryes_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_ner_viktoryes_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_viktoryes_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/viktoryes/bert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_semantic_augmentation_ner_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_semantic_augmentation_ner_en.md new file mode 100644 index 00000000000000..7577ae96ee1838 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_semantic_augmentation_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_semantic_augmentation_ner RoBertaForTokenClassification from lsoni +author: John Snow Labs +name: bert_finetuned_semantic_augmentation_ner +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_semantic_augmentation_ner` is a English model originally trained by lsoni. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_semantic_augmentation_ner_en_5.5.1_3.0_1731314256488.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_semantic_augmentation_ner_en_5.5.1_3.0_1731314256488.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("bert_finetuned_semantic_augmentation_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("bert_finetuned_semantic_augmentation_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_semantic_augmentation_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|439.3 MB| + +## References + +https://huggingface.co/lsoni/bert-finetuned-semantic-augmentation-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_squad_accelerate_3_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_squad_accelerate_3_en.md new file mode 100644 index 00000000000000..d48a730c987b85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_squad_accelerate_3_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_finetuned_squad_accelerate_3 BertForQuestionAnswering from camilag +author: John Snow Labs +name: bert_finetuned_squad_accelerate_3 +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_squad_accelerate_3` is a English model originally trained by camilag. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_squad_accelerate_3_en_5.5.1_3.0_1731307814641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_squad_accelerate_3_en_5.5.1_3.0_1731307814641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("bert_finetuned_squad_accelerate_3","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("bert_finetuned_squad_accelerate_3", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_squad_accelerate_3| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/camilag/bert-finetuned-squad-accelerate-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_squad_accelerate_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_squad_accelerate_3_pipeline_en.md new file mode 100644 index 00000000000000..68c9155a6c626c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_squad_accelerate_3_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_finetuned_squad_accelerate_3_pipeline pipeline BertForQuestionAnswering from camilag +author: John Snow Labs +name: bert_finetuned_squad_accelerate_3_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_squad_accelerate_3_pipeline` is a English model originally trained by camilag. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_squad_accelerate_3_pipeline_en_5.5.1_3.0_1731307835513.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_squad_accelerate_3_pipeline_en_5.5.1_3.0_1731307835513.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_squad_accelerate_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_squad_accelerate_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_squad_accelerate_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.0 MB| + +## References + +https://huggingface.co/camilag/bert-finetuned-squad-accelerate-3 + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_squad_dokyoungkim_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_squad_dokyoungkim_en.md new file mode 100644 index 00000000000000..d679abf825b127 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_squad_dokyoungkim_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_finetuned_squad_dokyoungkim BertForQuestionAnswering from dokyoungkim +author: John Snow Labs +name: bert_finetuned_squad_dokyoungkim +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_squad_dokyoungkim` is a English model originally trained by dokyoungkim. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_squad_dokyoungkim_en_5.5.1_3.0_1731307566830.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_squad_dokyoungkim_en_5.5.1_3.0_1731307566830.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("bert_finetuned_squad_dokyoungkim","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("bert_finetuned_squad_dokyoungkim", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_squad_dokyoungkim| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/dokyoungkim/bert-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_squad_dokyoungkim_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_squad_dokyoungkim_pipeline_en.md new file mode 100644 index 00000000000000..5b98fabd4bac67 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_squad_dokyoungkim_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_finetuned_squad_dokyoungkim_pipeline pipeline BertForQuestionAnswering from dokyoungkim +author: John Snow Labs +name: bert_finetuned_squad_dokyoungkim_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_squad_dokyoungkim_pipeline` is a English model originally trained by dokyoungkim. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_squad_dokyoungkim_pipeline_en_5.5.1_3.0_1731307587294.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_squad_dokyoungkim_pipeline_en_5.5.1_3.0_1731307587294.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_squad_dokyoungkim_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_squad_dokyoungkim_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_squad_dokyoungkim_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/dokyoungkim/bert-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_tmvar_corpus_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_tmvar_corpus_en.md new file mode 100644 index 00000000000000..fcfce703b9dd61 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_tmvar_corpus_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_tmvar_corpus BertForTokenClassification from Salvatore +author: John Snow Labs +name: bert_finetuned_tmvar_corpus +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_tmvar_corpus` is a English model originally trained by Salvatore. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_tmvar_corpus_en_5.5.1_3.0_1731298905916.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_tmvar_corpus_en_5.5.1_3.0_1731298905916.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_tmvar_corpus","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_tmvar_corpus", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_tmvar_corpus| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|362.8 MB| + +## References + +https://huggingface.co/Salvatore/bert-finetuned-tmvar-corpus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_tmvar_corpus_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_tmvar_corpus_pipeline_en.md new file mode 100644 index 00000000000000..2fbce5d2dc7d67 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_finetuned_tmvar_corpus_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_tmvar_corpus_pipeline pipeline BertForTokenClassification from Salvatore +author: John Snow Labs +name: bert_finetuned_tmvar_corpus_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_tmvar_corpus_pipeline` is a English model originally trained by Salvatore. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_tmvar_corpus_pipeline_en_5.5.1_3.0_1731298925116.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_tmvar_corpus_pipeline_en_5.5.1_3.0_1731298925116.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_tmvar_corpus_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_tmvar_corpus_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_tmvar_corpus_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|362.9 MB| + +## References + +https://huggingface.co/Salvatore/bert-finetuned-tmvar-corpus + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_large_finetuned_phishing_junginkim_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_large_finetuned_phishing_junginkim_en.md new file mode 100644 index 00000000000000..6c840df7ee5583 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_large_finetuned_phishing_junginkim_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_large_finetuned_phishing_junginkim BertForSequenceClassification from Junginkim +author: John Snow Labs +name: bert_large_finetuned_phishing_junginkim +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_finetuned_phishing_junginkim` is a English model originally trained by Junginkim. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_finetuned_phishing_junginkim_en_5.5.1_3.0_1731310431818.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_finetuned_phishing_junginkim_en_5.5.1_3.0_1731310431818.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_finetuned_phishing_junginkim","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_finetuned_phishing_junginkim", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_finetuned_phishing_junginkim| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Junginkim/bert-large-finetuned-phishing \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_large_finetuned_phishing_junginkim_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_large_finetuned_phishing_junginkim_pipeline_en.md new file mode 100644 index 00000000000000..dbfb6eb102677c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_large_finetuned_phishing_junginkim_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_large_finetuned_phishing_junginkim_pipeline pipeline BertForSequenceClassification from Junginkim +author: John Snow Labs +name: bert_large_finetuned_phishing_junginkim_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_finetuned_phishing_junginkim_pipeline` is a English model originally trained by Junginkim. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_finetuned_phishing_junginkim_pipeline_en_5.5.1_3.0_1731310494277.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_finetuned_phishing_junginkim_pipeline_en_5.5.1_3.0_1731310494277.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_large_finetuned_phishing_junginkim_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_large_finetuned_phishing_junginkim_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_finetuned_phishing_junginkim_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Junginkim/bert-large-finetuned-phishing + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_en.md new file mode 100644 index 00000000000000..c50d06093bd5f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_large_uncased_whole_word_masking_finetuned_squad_dev_i BertForQuestionAnswering from mdzrg +author: John Snow Labs +name: bert_large_uncased_whole_word_masking_finetuned_squad_dev_i +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_whole_word_masking_finetuned_squad_dev_i` is a English model originally trained by mdzrg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_en_5.5.1_3.0_1731307616981.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_en_5.5.1_3.0_1731307616981.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("bert_large_uncased_whole_word_masking_finetuned_squad_dev_i","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("bert_large_uncased_whole_word_masking_finetuned_squad_dev_i", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_whole_word_masking_finetuned_squad_dev_i| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/mdzrg/bert-large-uncased-whole-word-masking-finetuned-squad-dev-I \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_pipeline_en.md new file mode 100644 index 00000000000000..b1d311833b4854 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_pipeline pipeline BertForQuestionAnswering from mdzrg +author: John Snow Labs +name: bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_pipeline` is a English model originally trained by mdzrg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_pipeline_en_5.5.1_3.0_1731307680614.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_pipeline_en_5.5.1_3.0_1731307680614.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_whole_word_masking_finetuned_squad_dev_i_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/mdzrg/bert-large-uncased-whole-word-masking-finetuned-squad-dev-I + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_model_news_aggregator_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_model_news_aggregator_en.md new file mode 100644 index 00000000000000..1ed2337d2ed860 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_model_news_aggregator_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_model_news_aggregator BertForSequenceClassification from Subash2580 +author: John Snow Labs +name: bert_model_news_aggregator +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_model_news_aggregator` is a English model originally trained by Subash2580. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_model_news_aggregator_en_5.5.1_3.0_1731309957668.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_model_news_aggregator_en_5.5.1_3.0_1731309957668.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_model_news_aggregator","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_model_news_aggregator", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_model_news_aggregator| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Subash2580/Bert_model_news_aggregator \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_model_news_aggregator_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_model_news_aggregator_pipeline_en.md new file mode 100644 index 00000000000000..3e701d5cd4a867 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_model_news_aggregator_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_model_news_aggregator_pipeline pipeline BertForSequenceClassification from Subash2580 +author: John Snow Labs +name: bert_model_news_aggregator_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_model_news_aggregator_pipeline` is a English model originally trained by Subash2580. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_model_news_aggregator_pipeline_en_5.5.1_3.0_1731309980168.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_model_news_aggregator_pipeline_en_5.5.1_3.0_1731309980168.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_model_news_aggregator_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_model_news_aggregator_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_model_news_aggregator_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Subash2580/Bert_model_news_aggregator + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_news_class_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_news_class_en.md new file mode 100644 index 00000000000000..6cfc866d5d272d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_news_class_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_news_class BertForSequenceClassification from cssupport +author: John Snow Labs +name: bert_news_class +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_news_class` is a English model originally trained by cssupport. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_news_class_en_5.5.1_3.0_1731310012331.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_news_class_en_5.5.1_3.0_1731310012331.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_news_class","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_news_class", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_news_class| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/cssupport/bert-news-class \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_news_class_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_news_class_pipeline_en.md new file mode 100644 index 00000000000000..a5319d61f401dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_news_class_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_news_class_pipeline pipeline BertForSequenceClassification from cssupport +author: John Snow Labs +name: bert_news_class_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_news_class_pipeline` is a English model originally trained by cssupport. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_news_class_pipeline_en_5.5.1_3.0_1731310049127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_news_class_pipeline_en_5.5.1_3.0_1731310049127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_news_class_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_news_class_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_news_class_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/cssupport/bert-news-class + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_portuguese_squad2_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_portuguese_squad2_en.md new file mode 100644 index 00000000000000..5792d6c2ed183a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_portuguese_squad2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_portuguese_squad2 BertForQuestionAnswering from lfcc +author: John Snow Labs +name: bert_portuguese_squad2 +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_portuguese_squad2` is a English model originally trained by lfcc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_portuguese_squad2_en_5.5.1_3.0_1731289735794.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_portuguese_squad2_en_5.5.1_3.0_1731289735794.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("bert_portuguese_squad2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("bert_portuguese_squad2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_portuguese_squad2| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/lfcc/bert-portuguese-squad2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_portuguese_squad2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_portuguese_squad2_pipeline_en.md new file mode 100644 index 00000000000000..626a4567ed3c3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_portuguese_squad2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_portuguese_squad2_pipeline pipeline BertForQuestionAnswering from lfcc +author: John Snow Labs +name: bert_portuguese_squad2_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_portuguese_squad2_pipeline` is a English model originally trained by lfcc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_portuguese_squad2_pipeline_en_5.5.1_3.0_1731289756826.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_portuguese_squad2_pipeline_en_5.5.1_3.0_1731289756826.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_portuguese_squad2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_portuguese_squad2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_portuguese_squad2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.0 MB| + +## References + +https://huggingface.co/lfcc/bert-portuguese-squad2 + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_sliding_window_epoch_3_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_sliding_window_epoch_3_en.md new file mode 100644 index 00000000000000..cc75237af2e796 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_sliding_window_epoch_3_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_sliding_window_epoch_3 BertForQuestionAnswering from Whalejay +author: John Snow Labs +name: bert_sliding_window_epoch_3 +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_sliding_window_epoch_3` is a English model originally trained by Whalejay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sliding_window_epoch_3_en_5.5.1_3.0_1731307867943.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sliding_window_epoch_3_en_5.5.1_3.0_1731307867943.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("bert_sliding_window_epoch_3","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("bert_sliding_window_epoch_3", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sliding_window_epoch_3| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Whalejay/bert-sliding-window_epoch_3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_sliding_window_epoch_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bert_sliding_window_epoch_3_pipeline_en.md new file mode 100644 index 00000000000000..2edab514beaa3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_sliding_window_epoch_3_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_sliding_window_epoch_3_pipeline pipeline BertForQuestionAnswering from Whalejay +author: John Snow Labs +name: bert_sliding_window_epoch_3_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_sliding_window_epoch_3_pipeline` is a English model originally trained by Whalejay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sliding_window_epoch_3_pipeline_en_5.5.1_3.0_1731307932813.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sliding_window_epoch_3_pipeline_en_5.5.1_3.0_1731307932813.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_sliding_window_epoch_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_sliding_window_epoch_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sliding_window_epoch_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Whalejay/bert-sliding-window_epoch_3 + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_xomlac_ner_pipeline_zh.md b/docs/_posts/ahmedlone127/2024-11-11-bert_xomlac_ner_pipeline_zh.md new file mode 100644 index 00000000000000..2122b09d6699e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_xomlac_ner_pipeline_zh.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Chinese bert_xomlac_ner_pipeline pipeline BertForTokenClassification from b3x0m +author: John Snow Labs +name: bert_xomlac_ner_pipeline +date: 2024-11-11 +tags: [zh, open_source, pipeline, onnx] +task: Named Entity Recognition +language: zh +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_xomlac_ner_pipeline` is a Chinese model originally trained by b3x0m. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_xomlac_ner_pipeline_zh_5.5.1_3.0_1731298814714.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_xomlac_ner_pipeline_zh_5.5.1_3.0_1731298814714.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_xomlac_ner_pipeline", lang = "zh") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_xomlac_ner_pipeline", lang = "zh") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_xomlac_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|zh| +|Size:|381.1 MB| + +## References + +https://huggingface.co/b3x0m/bert-xomlac-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bert_xomlac_ner_zh.md b/docs/_posts/ahmedlone127/2024-11-11-bert_xomlac_ner_zh.md new file mode 100644 index 00000000000000..8cfbfeb70a61e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bert_xomlac_ner_zh.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Chinese bert_xomlac_ner BertForTokenClassification from b3x0m +author: John Snow Labs +name: bert_xomlac_ner +date: 2024-11-11 +tags: [zh, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: zh +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_xomlac_ner` is a Chinese model originally trained by b3x0m. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_xomlac_ner_zh_5.5.1_3.0_1731298794255.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_xomlac_ner_zh_5.5.1_3.0_1731298794255.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_xomlac_ner","zh") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_xomlac_ner", "zh") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_xomlac_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|zh| +|Size:|381.1 MB| + +## References + +https://huggingface.co/b3x0m/bert-xomlac-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-beto_finetuned_ner_13_en.md b/docs/_posts/ahmedlone127/2024-11-11-beto_finetuned_ner_13_en.md new file mode 100644 index 00000000000000..9302d0b8eb13ba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-beto_finetuned_ner_13_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English beto_finetuned_ner_13 BertForTokenClassification from ifis +author: John Snow Labs +name: beto_finetuned_ner_13 +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beto_finetuned_ner_13` is a English model originally trained by ifis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beto_finetuned_ner_13_en_5.5.1_3.0_1731291039879.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beto_finetuned_ner_13_en_5.5.1_3.0_1731291039879.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("beto_finetuned_ner_13","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("beto_finetuned_ner_13", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beto_finetuned_ner_13| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/ifis/BETO-finetuned-ner-13 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-beto_finetuned_ner_13_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-beto_finetuned_ner_13_pipeline_en.md new file mode 100644 index 00000000000000..204268321dad36 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-beto_finetuned_ner_13_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English beto_finetuned_ner_13_pipeline pipeline BertForTokenClassification from ifis +author: John Snow Labs +name: beto_finetuned_ner_13_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beto_finetuned_ner_13_pipeline` is a English model originally trained by ifis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beto_finetuned_ner_13_pipeline_en_5.5.1_3.0_1731291060919.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beto_finetuned_ner_13_pipeline_en_5.5.1_3.0_1731291060919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("beto_finetuned_ner_13_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("beto_finetuned_ner_13_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beto_finetuned_ner_13_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/ifis/BETO-finetuned-ner-13 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-beto_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-11-11-beto_finetuned_ner_en.md new file mode 100644 index 00000000000000..ac6e503632a509 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-beto_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English beto_finetuned_ner BertForTokenClassification from JoshuaAAX +author: John Snow Labs +name: beto_finetuned_ner +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beto_finetuned_ner` is a English model originally trained by JoshuaAAX. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beto_finetuned_ner_en_5.5.1_3.0_1731291099811.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beto_finetuned_ner_en_5.5.1_3.0_1731291099811.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("beto_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("beto_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beto_finetuned_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/JoshuaAAX/beto-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-beto_finetuned_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-beto_finetuned_ner_pipeline_en.md new file mode 100644 index 00000000000000..0f310238b10ee7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-beto_finetuned_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English beto_finetuned_ner_pipeline pipeline BertForTokenClassification from JoshuaAAX +author: John Snow Labs +name: beto_finetuned_ner_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beto_finetuned_ner_pipeline` is a English model originally trained by JoshuaAAX. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beto_finetuned_ner_pipeline_en_5.5.1_3.0_1731291120382.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beto_finetuned_ner_pipeline_en_5.5.1_3.0_1731291120382.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("beto_finetuned_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("beto_finetuned_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beto_finetuned_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/JoshuaAAX/beto-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-beto_sayula_popoluca_en.md b/docs/_posts/ahmedlone127/2024-11-11-beto_sayula_popoluca_en.md new file mode 100644 index 00000000000000..5779fb8f6fdf0b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-beto_sayula_popoluca_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English beto_sayula_popoluca BertForTokenClassification from hugo-albert +author: John Snow Labs +name: beto_sayula_popoluca +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beto_sayula_popoluca` is a English model originally trained by hugo-albert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beto_sayula_popoluca_en_5.5.1_3.0_1731290885885.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beto_sayula_popoluca_en_5.5.1_3.0_1731290885885.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("beto_sayula_popoluca","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("beto_sayula_popoluca", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beto_sayula_popoluca| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|409.7 MB| + +## References + +https://huggingface.co/hugo-albert/beto-pos \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-beto_sayula_popoluca_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-beto_sayula_popoluca_pipeline_en.md new file mode 100644 index 00000000000000..41a5e1a08577a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-beto_sayula_popoluca_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English beto_sayula_popoluca_pipeline pipeline BertForTokenClassification from hugo-albert +author: John Snow Labs +name: beto_sayula_popoluca_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beto_sayula_popoluca_pipeline` is a English model originally trained by hugo-albert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beto_sayula_popoluca_pipeline_en_5.5.1_3.0_1731290909568.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beto_sayula_popoluca_pipeline_en_5.5.1_3.0_1731290909568.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("beto_sayula_popoluca_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("beto_sayula_popoluca_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beto_sayula_popoluca_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.7 MB| + +## References + +https://huggingface.co/hugo-albert/beto-pos + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-beto_sentiment_analysis_finetuned_en.md b/docs/_posts/ahmedlone127/2024-11-11-beto_sentiment_analysis_finetuned_en.md new file mode 100644 index 00000000000000..4272f17101d73e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-beto_sentiment_analysis_finetuned_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English beto_sentiment_analysis_finetuned BertForSequenceClassification from stinoco +author: John Snow Labs +name: beto_sentiment_analysis_finetuned +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beto_sentiment_analysis_finetuned` is a English model originally trained by stinoco. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beto_sentiment_analysis_finetuned_en_5.5.1_3.0_1731309155278.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beto_sentiment_analysis_finetuned_en_5.5.1_3.0_1731309155278.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("beto_sentiment_analysis_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("beto_sentiment_analysis_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beto_sentiment_analysis_finetuned| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|411.3 MB| + +## References + +https://huggingface.co/stinoco/beto-sentiment-analysis-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-beto_sentiment_analysis_finetuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-beto_sentiment_analysis_finetuned_pipeline_en.md new file mode 100644 index 00000000000000..b4e527ace7b1f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-beto_sentiment_analysis_finetuned_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English beto_sentiment_analysis_finetuned_pipeline pipeline BertForSequenceClassification from stinoco +author: John Snow Labs +name: beto_sentiment_analysis_finetuned_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beto_sentiment_analysis_finetuned_pipeline` is a English model originally trained by stinoco. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beto_sentiment_analysis_finetuned_pipeline_en_5.5.1_3.0_1731309178136.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beto_sentiment_analysis_finetuned_pipeline_en_5.5.1_3.0_1731309178136.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("beto_sentiment_analysis_finetuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("beto_sentiment_analysis_finetuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beto_sentiment_analysis_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|411.4 MB| + +## References + +https://huggingface.co/stinoco/beto-sentiment-analysis-finetuned + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_99gpt_v1_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_99gpt_v1_en.md new file mode 100644 index 00000000000000..6fe0fe3ea4658e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_99gpt_v1_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_99gpt_v1 BGEEmbeddings from marroyo777 +author: John Snow Labs +name: bge_99gpt_v1 +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_99gpt_v1` is a English model originally trained by marroyo777. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_99gpt_v1_en_5.5.1_3.0_1731313056005.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_99gpt_v1_en_5.5.1_3.0_1731313056005.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_99gpt_v1","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_99gpt_v1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_99gpt_v1| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|116.0 MB| + +## References + +https://huggingface.co/marroyo777/bge-99GPT-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_99gpt_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_99gpt_v1_pipeline_en.md new file mode 100644 index 00000000000000..1acf893dba9fe5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_99gpt_v1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_99gpt_v1_pipeline pipeline BGEEmbeddings from marroyo777 +author: John Snow Labs +name: bge_99gpt_v1_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_99gpt_v1_pipeline` is a English model originally trained by marroyo777. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_99gpt_v1_pipeline_en_5.5.1_3.0_1731313065265.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_99gpt_v1_pipeline_en_5.5.1_3.0_1731313065265.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_99gpt_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_99gpt_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_99gpt_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|116.0 MB| + +## References + +https://huggingface.co/marroyo777/bge-99GPT-v1 + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_base_english_v1_5_course_recommender_v2_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_base_english_v1_5_course_recommender_v2_en.md new file mode 100644 index 00000000000000..af03cfe01992b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_base_english_v1_5_course_recommender_v2_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_base_english_v1_5_course_recommender_v2 BGEEmbeddings from datasocietyco +author: John Snow Labs +name: bge_base_english_v1_5_course_recommender_v2 +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_base_english_v1_5_course_recommender_v2` is a English model originally trained by datasocietyco. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_base_english_v1_5_course_recommender_v2_en_5.5.1_3.0_1731313437646.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_base_english_v1_5_course_recommender_v2_en_5.5.1_3.0_1731313437646.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_base_english_v1_5_course_recommender_v2","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_base_english_v1_5_course_recommender_v2","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_base_english_v1_5_course_recommender_v2| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|375.2 MB| + +## References + +https://huggingface.co/datasocietyco/bge-base-en-v1.5-course-recommender-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_base_english_v1_5_course_recommender_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_base_english_v1_5_course_recommender_v2_pipeline_en.md new file mode 100644 index 00000000000000..512787f72c67f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_base_english_v1_5_course_recommender_v2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_base_english_v1_5_course_recommender_v2_pipeline pipeline BGEEmbeddings from datasocietyco +author: John Snow Labs +name: bge_base_english_v1_5_course_recommender_v2_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_base_english_v1_5_course_recommender_v2_pipeline` is a English model originally trained by datasocietyco. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_base_english_v1_5_course_recommender_v2_pipeline_en_5.5.1_3.0_1731313469876.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_base_english_v1_5_course_recommender_v2_pipeline_en_5.5.1_3.0_1731313469876.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_base_english_v1_5_course_recommender_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_base_english_v1_5_course_recommender_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_base_english_v1_5_course_recommender_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|375.2 MB| + +## References + +https://huggingface.co/datasocietyco/bge-base-en-v1.5-course-recommender-v2 + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_base_english_v1_5_finetuned_osllmai_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_base_english_v1_5_finetuned_osllmai_v1_pipeline_en.md new file mode 100644 index 00000000000000..c18f2e8cbf2336 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_base_english_v1_5_finetuned_osllmai_v1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_base_english_v1_5_finetuned_osllmai_v1_pipeline pipeline BGEEmbeddings from osllmai +author: John Snow Labs +name: bge_base_english_v1_5_finetuned_osllmai_v1_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_base_english_v1_5_finetuned_osllmai_v1_pipeline` is a English model originally trained by osllmai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_base_english_v1_5_finetuned_osllmai_v1_pipeline_en_5.5.1_3.0_1731313060461.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_base_english_v1_5_finetuned_osllmai_v1_pipeline_en_5.5.1_3.0_1731313060461.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_base_english_v1_5_finetuned_osllmai_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_base_english_v1_5_finetuned_osllmai_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_base_english_v1_5_finetuned_osllmai_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|404.3 MB| + +## References + +https://huggingface.co/osllmai/bge-base-en-v1.5-finetuned_osllmai_v1 + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_base_legal_matryoshka_v1_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_base_legal_matryoshka_v1_en.md new file mode 100644 index 00000000000000..9647d613397e49 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_base_legal_matryoshka_v1_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_base_legal_matryoshka_v1 BGEEmbeddings from Tejasw1 +author: John Snow Labs +name: bge_base_legal_matryoshka_v1 +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_base_legal_matryoshka_v1` is a English model originally trained by Tejasw1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_base_legal_matryoshka_v1_en_5.5.1_3.0_1731312723474.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_base_legal_matryoshka_v1_en_5.5.1_3.0_1731312723474.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_base_legal_matryoshka_v1","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_base_legal_matryoshka_v1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_base_legal_matryoshka_v1| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|383.6 MB| + +## References + +https://huggingface.co/Tejasw1/bge-base-legal-matryoshka-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_base_legal_matryoshka_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_base_legal_matryoshka_v1_pipeline_en.md new file mode 100644 index 00000000000000..c30db01371301c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_base_legal_matryoshka_v1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_base_legal_matryoshka_v1_pipeline pipeline BGEEmbeddings from Tejasw1 +author: John Snow Labs +name: bge_base_legal_matryoshka_v1_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_base_legal_matryoshka_v1_pipeline` is a English model originally trained by Tejasw1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_base_legal_matryoshka_v1_pipeline_en_5.5.1_3.0_1731312753524.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_base_legal_matryoshka_v1_pipeline_en_5.5.1_3.0_1731312753524.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_base_legal_matryoshka_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_base_legal_matryoshka_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_base_legal_matryoshka_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|383.6 MB| + +## References + +https://huggingface.co/Tejasw1/bge-base-legal-matryoshka-v1 + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_large_zhtw_v1_5_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_large_zhtw_v1_5_en.md new file mode 100644 index 00000000000000..cd14d7182a0705 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_large_zhtw_v1_5_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_large_zhtw_v1_5 BGEEmbeddings from cfchu +author: John Snow Labs +name: bge_large_zhtw_v1_5 +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_large_zhtw_v1_5` is a English model originally trained by cfchu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_large_zhtw_v1_5_en_5.5.1_3.0_1731312857829.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_large_zhtw_v1_5_en_5.5.1_3.0_1731312857829.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_large_zhtw_v1_5","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_large_zhtw_v1_5","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_large_zhtw_v1_5| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/cfchu/bge-large-zhtw-v1.5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_large_zhtw_v1_5_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_large_zhtw_v1_5_pipeline_en.md new file mode 100644 index 00000000000000..7f7668a4d0feac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_large_zhtw_v1_5_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_large_zhtw_v1_5_pipeline pipeline BGEEmbeddings from cfchu +author: John Snow Labs +name: bge_large_zhtw_v1_5_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_large_zhtw_v1_5_pipeline` is a English model originally trained by cfchu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_large_zhtw_v1_5_pipeline_en_5.5.1_3.0_1731312930680.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_large_zhtw_v1_5_pipeline_en_5.5.1_3.0_1731312930680.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_large_zhtw_v1_5_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_large_zhtw_v1_5_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_large_zhtw_v1_5_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/cfchu/bge-large-zhtw-v1.5 + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_micro_smiles_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_micro_smiles_en.md new file mode 100644 index 00000000000000..7ab539b2d3c30c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_micro_smiles_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_micro_smiles BGEEmbeddings from fpc +author: John Snow Labs +name: bge_micro_smiles +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_micro_smiles` is a English model originally trained by fpc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_micro_smiles_en_5.5.1_3.0_1731313171405.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_micro_smiles_en_5.5.1_3.0_1731313171405.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_micro_smiles","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_micro_smiles","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_micro_smiles| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|52.2 MB| + +## References + +https://huggingface.co/fpc/bge-micro-smiles \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_micro_smiles_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_micro_smiles_pipeline_en.md new file mode 100644 index 00000000000000..756aa54cbd12b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_micro_smiles_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_micro_smiles_pipeline pipeline BGEEmbeddings from fpc +author: John Snow Labs +name: bge_micro_smiles_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_micro_smiles_pipeline` is a English model originally trained by fpc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_micro_smiles_pipeline_en_5.5.1_3.0_1731313179313.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_micro_smiles_pipeline_en_5.5.1_3.0_1731313179313.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_micro_smiles_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_micro_smiles_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_micro_smiles_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|52.2 MB| + +## References + +https://huggingface.co/fpc/bge-micro-smiles + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_small_english_v1_5_ft_orc_0930_dates_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_small_english_v1_5_ft_orc_0930_dates_en.md new file mode 100644 index 00000000000000..3d06606c8eb28e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_small_english_v1_5_ft_orc_0930_dates_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_small_english_v1_5_ft_orc_0930_dates BGEEmbeddings from magnifi +author: John Snow Labs +name: bge_small_english_v1_5_ft_orc_0930_dates +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_small_english_v1_5_ft_orc_0930_dates` is a English model originally trained by magnifi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_small_english_v1_5_ft_orc_0930_dates_en_5.5.1_3.0_1731312897567.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_small_english_v1_5_ft_orc_0930_dates_en_5.5.1_3.0_1731312897567.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_small_english_v1_5_ft_orc_0930_dates","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_small_english_v1_5_ft_orc_0930_dates","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_small_english_v1_5_ft_orc_0930_dates| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|110.3 MB| + +## References + +https://huggingface.co/magnifi/bge-small-en-v1.5-ft-orc-0930-dates \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_small_english_v1_5_ft_orc_0930_dates_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_small_english_v1_5_ft_orc_0930_dates_pipeline_en.md new file mode 100644 index 00000000000000..3000196ba709a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_small_english_v1_5_ft_orc_0930_dates_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_small_english_v1_5_ft_orc_0930_dates_pipeline pipeline BGEEmbeddings from magnifi +author: John Snow Labs +name: bge_small_english_v1_5_ft_orc_0930_dates_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_small_english_v1_5_ft_orc_0930_dates_pipeline` is a English model originally trained by magnifi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_small_english_v1_5_ft_orc_0930_dates_pipeline_en_5.5.1_3.0_1731312908553.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_small_english_v1_5_ft_orc_0930_dates_pipeline_en_5.5.1_3.0_1731312908553.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_small_english_v1_5_ft_orc_0930_dates_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_small_english_v1_5_ft_orc_0930_dates_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_small_english_v1_5_ft_orc_0930_dates_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|110.3 MB| + +## References + +https://huggingface.co/magnifi/bge-small-en-v1.5-ft-orc-0930-dates + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_small_english_v1_5_rirag_obliqa_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_small_english_v1_5_rirag_obliqa_en.md new file mode 100644 index 00000000000000..5ab25ba41147f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_small_english_v1_5_rirag_obliqa_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_small_english_v1_5_rirag_obliqa BGEEmbeddings from raul-delarosa99 +author: John Snow Labs +name: bge_small_english_v1_5_rirag_obliqa +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_small_english_v1_5_rirag_obliqa` is a English model originally trained by raul-delarosa99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_small_english_v1_5_rirag_obliqa_en_5.5.1_3.0_1731313044537.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_small_english_v1_5_rirag_obliqa_en_5.5.1_3.0_1731313044537.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_small_english_v1_5_rirag_obliqa","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_small_english_v1_5_rirag_obliqa","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_small_english_v1_5_rirag_obliqa| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|79.7 MB| + +## References + +https://huggingface.co/raul-delarosa99/bge-small-en-v1.5-RIRAG_ObliQA \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_tuned_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_tuned_en.md new file mode 100644 index 00000000000000..555047dcb4072e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_tuned_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_tuned BGEEmbeddings from minh132 +author: John Snow Labs +name: bge_tuned +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_tuned` is a English model originally trained by minh132. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_tuned_en_5.5.1_3.0_1731313341106.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_tuned_en_5.5.1_3.0_1731313341106.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_tuned","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_tuned","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_tuned| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/minh132/bge-tuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bge_tuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bge_tuned_pipeline_en.md new file mode 100644 index 00000000000000..6c8b87738cf009 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bge_tuned_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_tuned_pipeline pipeline BGEEmbeddings from minh132 +author: John Snow Labs +name: bge_tuned_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_tuned_pipeline` is a English model originally trained by minh132. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_tuned_pipeline_en_5.5.1_3.0_1731313405235.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_tuned_pipeline_en_5.5.1_3.0_1731313405235.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_tuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_tuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_tuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/minh132/bge-tuned + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bio_clinicalbert_medical_en.md b/docs/_posts/ahmedlone127/2024-11-11-bio_clinicalbert_medical_en.md new file mode 100644 index 00000000000000..58fbf151de55d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bio_clinicalbert_medical_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bio_clinicalbert_medical BertForSequenceClassification from tarasophia +author: John Snow Labs +name: bio_clinicalbert_medical +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bio_clinicalbert_medical` is a English model originally trained by tarasophia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bio_clinicalbert_medical_en_5.5.1_3.0_1731310240544.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bio_clinicalbert_medical_en_5.5.1_3.0_1731310240544.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("bio_clinicalbert_medical","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bio_clinicalbert_medical", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bio_clinicalbert_medical| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|405.6 MB| + +## References + +https://huggingface.co/tarasophia/Bio_ClinicalBERT_medical \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bio_clinicalbert_medical_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-bio_clinicalbert_medical_pipeline_en.md new file mode 100644 index 00000000000000..3867571123e64b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bio_clinicalbert_medical_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bio_clinicalbert_medical_pipeline pipeline BertForSequenceClassification from tarasophia +author: John Snow Labs +name: bio_clinicalbert_medical_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bio_clinicalbert_medical_pipeline` is a English model originally trained by tarasophia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bio_clinicalbert_medical_pipeline_en_5.5.1_3.0_1731310261467.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bio_clinicalbert_medical_pipeline_en_5.5.1_3.0_1731310261467.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bio_clinicalbert_medical_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bio_clinicalbert_medical_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bio_clinicalbert_medical_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|405.6 MB| + +## References + +https://huggingface.co/tarasophia/Bio_ClinicalBERT_medical + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-biobert_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-11-11-biobert_finetuned_ner_en.md new file mode 100644 index 00000000000000..62bbed3770bcf2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-biobert_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English biobert_finetuned_ner BertForTokenClassification from jialinselenasong +author: John Snow Labs +name: biobert_finetuned_ner +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_finetuned_ner` is a English model originally trained by jialinselenasong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_finetuned_ner_en_5.5.1_3.0_1731285895856.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_finetuned_ner_en_5.5.1_3.0_1731285895856.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("biobert_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("biobert_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_finetuned_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.1 MB| + +## References + +https://huggingface.co/jialinselenasong/biobert-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-biobert_finetuned_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-biobert_finetuned_ner_pipeline_en.md new file mode 100644 index 00000000000000..ee71f638d2a722 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-biobert_finetuned_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English biobert_finetuned_ner_pipeline pipeline BertForTokenClassification from jialinselenasong +author: John Snow Labs +name: biobert_finetuned_ner_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_finetuned_ner_pipeline` is a English model originally trained by jialinselenasong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_finetuned_ner_pipeline_en_5.5.1_3.0_1731285921161.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_finetuned_ner_pipeline_en_5.5.1_3.0_1731285921161.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("biobert_finetuned_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("biobert_finetuned_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_finetuned_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.1 MB| + +## References + +https://huggingface.co/jialinselenasong/biobert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-biolinkbert_outcomes_ner_en.md b/docs/_posts/ahmedlone127/2024-11-11-biolinkbert_outcomes_ner_en.md new file mode 100644 index 00000000000000..f69fec86f90ffc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-biolinkbert_outcomes_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English biolinkbert_outcomes_ner BertForTokenClassification from laiking +author: John Snow Labs +name: biolinkbert_outcomes_ner +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biolinkbert_outcomes_ner` is a English model originally trained by laiking. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biolinkbert_outcomes_ner_en_5.5.1_3.0_1731290874920.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biolinkbert_outcomes_ner_en_5.5.1_3.0_1731290874920.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("biolinkbert_outcomes_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("biolinkbert_outcomes_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biolinkbert_outcomes_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/laiking/biolinkbert-outcomes-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-biolinkbert_outcomes_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-biolinkbert_outcomes_ner_pipeline_en.md new file mode 100644 index 00000000000000..017d8a8209a313 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-biolinkbert_outcomes_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English biolinkbert_outcomes_ner_pipeline pipeline BertForTokenClassification from laiking +author: John Snow Labs +name: biolinkbert_outcomes_ner_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biolinkbert_outcomes_ner_pipeline` is a English model originally trained by laiking. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biolinkbert_outcomes_ner_pipeline_en_5.5.1_3.0_1731290896292.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biolinkbert_outcomes_ner_pipeline_en_5.5.1_3.0_1731290896292.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("biolinkbert_outcomes_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("biolinkbert_outcomes_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biolinkbert_outcomes_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/laiking/biolinkbert-outcomes-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-biomed_roberta_all_deep_en.md b/docs/_posts/ahmedlone127/2024-11-11-biomed_roberta_all_deep_en.md new file mode 100644 index 00000000000000..b8ba4c2a86dc3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-biomed_roberta_all_deep_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English biomed_roberta_all_deep RoBertaForTokenClassification from jialinselenasong +author: John Snow Labs +name: biomed_roberta_all_deep +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomed_roberta_all_deep` is a English model originally trained by jialinselenasong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomed_roberta_all_deep_en_5.5.1_3.0_1731311373367.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomed_roberta_all_deep_en_5.5.1_3.0_1731311373367.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("biomed_roberta_all_deep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("biomed_roberta_all_deep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomed_roberta_all_deep| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|466.3 MB| + +## References + +https://huggingface.co/jialinselenasong/biomed_roberta_all_deep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_en.md b/docs/_posts/ahmedlone127/2024-11-11-biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_en.md new file mode 100644 index 00000000000000..80c4b0e0579710 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa BertForSequenceClassification from blizrys +author: John Snow Labs +name: biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa` is a English model originally trained by blizrys. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_en_5.5.1_3.0_1731310162199.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_en_5.5.1_3.0_1731310162199.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|410.4 MB| + +## References + +https://huggingface.co/blizrys/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext-finetuned-pubmedqa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_pipeline_en.md new file mode 100644 index 00000000000000..be9d4f6ad69b31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_pipeline_en.md @@ -0,0 +1,72 @@ +--- +layout: model +title: English biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_pipeline pipeline BertForSequenceClassification from blizrys +author: John Snow Labs +name: biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_pipeline` is a English model originally trained by blizrys. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_pipeline_en_5.5.1_3.0_1731310188434.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_pipeline_en_5.5.1_3.0_1731310188434.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +pipeline = PretrainedPipeline("biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_pipeline", lang = "en") +annotations = pipeline.transform(df) +``` +```scala +val pipeline = new PretrainedPipeline("biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_pipeline", lang = "en") +val annotations = pipeline.transform(df) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomednlp_pubmedbert_base_uncased_abstract_fulltext_finetuned_pubmedqa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|410.4 MB| + +## References + +References + +https://huggingface.co/blizrys/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext-finetuned-pubmedqa + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-biomednlp_pubmedbert_proteinstructure_ner_v2_1_en.md b/docs/_posts/ahmedlone127/2024-11-11-biomednlp_pubmedbert_proteinstructure_ner_v2_1_en.md new file mode 100644 index 00000000000000..c1f7c3868cebd0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-biomednlp_pubmedbert_proteinstructure_ner_v2_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English biomednlp_pubmedbert_proteinstructure_ner_v2_1 BertForTokenClassification from PDBEurope +author: John Snow Labs +name: biomednlp_pubmedbert_proteinstructure_ner_v2_1 +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomednlp_pubmedbert_proteinstructure_ner_v2_1` is a English model originally trained by PDBEurope. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_proteinstructure_ner_v2_1_en_5.5.1_3.0_1731285743740.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_proteinstructure_ner_v2_1_en_5.5.1_3.0_1731285743740.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("biomednlp_pubmedbert_proteinstructure_ner_v2_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("biomednlp_pubmedbert_proteinstructure_ner_v2_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomednlp_pubmedbert_proteinstructure_ner_v2_1| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|408.3 MB| + +## References + +https://huggingface.co/PDBEurope/BiomedNLP-PubMedBERT-ProteinStructure-NER-v2.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-biomednlp_pubmedbert_proteinstructure_ner_v2_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-biomednlp_pubmedbert_proteinstructure_ner_v2_1_pipeline_en.md new file mode 100644 index 00000000000000..f520348db9181e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-biomednlp_pubmedbert_proteinstructure_ner_v2_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English biomednlp_pubmedbert_proteinstructure_ner_v2_1_pipeline pipeline BertForTokenClassification from PDBEurope +author: John Snow Labs +name: biomednlp_pubmedbert_proteinstructure_ner_v2_1_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomednlp_pubmedbert_proteinstructure_ner_v2_1_pipeline` is a English model originally trained by PDBEurope. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_proteinstructure_ner_v2_1_pipeline_en_5.5.1_3.0_1731285765036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_proteinstructure_ner_v2_1_pipeline_en_5.5.1_3.0_1731285765036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("biomednlp_pubmedbert_proteinstructure_ner_v2_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("biomednlp_pubmedbert_proteinstructure_ner_v2_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomednlp_pubmedbert_proteinstructure_ner_v2_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.3 MB| + +## References + +https://huggingface.co/PDBEurope/BiomedNLP-PubMedBERT-ProteinStructure-NER-v2.1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bsc_bio_ehr_spanish_distemist_es.md b/docs/_posts/ahmedlone127/2024-11-11-bsc_bio_ehr_spanish_distemist_es.md new file mode 100644 index 00000000000000..804ae6163d7309 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bsc_bio_ehr_spanish_distemist_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish bsc_bio_ehr_spanish_distemist RoBertaForTokenClassification from BSC-NLP4BIA +author: John Snow Labs +name: bsc_bio_ehr_spanish_distemist +date: 2024-11-11 +tags: [es, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_distemist` is a Castilian, Spanish model originally trained by BSC-NLP4BIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_distemist_es_5.5.1_3.0_1731311054905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_distemist_es_5.5.1_3.0_1731311054905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("bsc_bio_ehr_spanish_distemist","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("bsc_bio_ehr_spanish_distemist", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_distemist| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|441.8 MB| + +## References + +https://huggingface.co/BSC-NLP4BIA/bsc-bio-ehr-es-distemist \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bsc_bio_ehr_spanish_distemist_pipeline_es.md b/docs/_posts/ahmedlone127/2024-11-11-bsc_bio_ehr_spanish_distemist_pipeline_es.md new file mode 100644 index 00000000000000..be867693b94b88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bsc_bio_ehr_spanish_distemist_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish bsc_bio_ehr_spanish_distemist_pipeline pipeline RoBertaForTokenClassification from BSC-NLP4BIA +author: John Snow Labs +name: bsc_bio_ehr_spanish_distemist_pipeline +date: 2024-11-11 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_distemist_pipeline` is a Castilian, Spanish model originally trained by BSC-NLP4BIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_distemist_pipeline_es_5.5.1_3.0_1731311079632.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_distemist_pipeline_es_5.5.1_3.0_1731311079632.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bsc_bio_ehr_spanish_distemist_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bsc_bio_ehr_spanish_distemist_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_distemist_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|441.8 MB| + +## References + +https://huggingface.co/BSC-NLP4BIA/bsc-bio-ehr-es-distemist + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-bsc_bio_ehr_spanish_medprocner_pipeline_es.md b/docs/_posts/ahmedlone127/2024-11-11-bsc_bio_ehr_spanish_medprocner_pipeline_es.md new file mode 100644 index 00000000000000..76b4458fe2aa97 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-bsc_bio_ehr_spanish_medprocner_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish bsc_bio_ehr_spanish_medprocner_pipeline pipeline RoBertaForTokenClassification from BSC-NLP4BIA +author: John Snow Labs +name: bsc_bio_ehr_spanish_medprocner_pipeline +date: 2024-11-11 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_medprocner_pipeline` is a Castilian, Spanish model originally trained by BSC-NLP4BIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_medprocner_pipeline_es_5.5.1_3.0_1731314158801.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_medprocner_pipeline_es_5.5.1_3.0_1731314158801.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bsc_bio_ehr_spanish_medprocner_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bsc_bio_ehr_spanish_medprocner_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_medprocner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|441.8 MB| + +## References + +https://huggingface.co/BSC-NLP4BIA/bsc-bio-ehr-es-medprocner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-burmese_bert_nepal_bhasa_version_5_0_en.md b/docs/_posts/ahmedlone127/2024-11-11-burmese_bert_nepal_bhasa_version_5_0_en.md new file mode 100644 index 00000000000000..e1f011258cabf7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-burmese_bert_nepal_bhasa_version_5_0_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_bert_nepal_bhasa_version_5_0 BertForQuestionAnswering from Ashkh0099 +author: John Snow Labs +name: burmese_bert_nepal_bhasa_version_5_0 +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_bert_nepal_bhasa_version_5_0` is a English model originally trained by Ashkh0099. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_bert_nepal_bhasa_version_5_0_en_5.5.1_3.0_1731289220310.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_bert_nepal_bhasa_version_5_0_en_5.5.1_3.0_1731289220310.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("burmese_bert_nepal_bhasa_version_5_0","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("burmese_bert_nepal_bhasa_version_5_0", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_bert_nepal_bhasa_version_5_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Ashkh0099/my-bert-new-version-5.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-burmese_bert_nepal_bhasa_version_5_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-burmese_bert_nepal_bhasa_version_5_0_pipeline_en.md new file mode 100644 index 00000000000000..210acef151dd59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-burmese_bert_nepal_bhasa_version_5_0_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_bert_nepal_bhasa_version_5_0_pipeline pipeline BertForQuestionAnswering from Ashkh0099 +author: John Snow Labs +name: burmese_bert_nepal_bhasa_version_5_0_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_bert_nepal_bhasa_version_5_0_pipeline` is a English model originally trained by Ashkh0099. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_bert_nepal_bhasa_version_5_0_pipeline_en_5.5.1_3.0_1731289246571.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_bert_nepal_bhasa_version_5_0_pipeline_en_5.5.1_3.0_1731289246571.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_bert_nepal_bhasa_version_5_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_bert_nepal_bhasa_version_5_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_bert_nepal_bhasa_version_5_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Ashkh0099/my-bert-new-version-5.0 + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-cc_uffs_ppc_ft_test_multiqa_en.md b/docs/_posts/ahmedlone127/2024-11-11-cc_uffs_ppc_ft_test_multiqa_en.md new file mode 100644 index 00000000000000..d2c15b2c8f3562 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-cc_uffs_ppc_ft_test_multiqa_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English cc_uffs_ppc_ft_test_multiqa MPNetEmbeddings from winderfeld +author: John Snow Labs +name: cc_uffs_ppc_ft_test_multiqa +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cc_uffs_ppc_ft_test_multiqa` is a English model originally trained by winderfeld. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cc_uffs_ppc_ft_test_multiqa_en_5.5.1_3.0_1731294914275.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cc_uffs_ppc_ft_test_multiqa_en_5.5.1_3.0_1731294914275.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("cc_uffs_ppc_ft_test_multiqa","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("cc_uffs_ppc_ft_test_multiqa","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cc_uffs_ppc_ft_test_multiqa| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/winderfeld/cc-uffs-ppc-ft-test-multiqa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-cc_uffs_ppc_ft_test_multiqa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-cc_uffs_ppc_ft_test_multiqa_pipeline_en.md new file mode 100644 index 00000000000000..20f7fe6e43321e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-cc_uffs_ppc_ft_test_multiqa_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English cc_uffs_ppc_ft_test_multiqa_pipeline pipeline MPNetEmbeddings from winderfeld +author: John Snow Labs +name: cc_uffs_ppc_ft_test_multiqa_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cc_uffs_ppc_ft_test_multiqa_pipeline` is a English model originally trained by winderfeld. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cc_uffs_ppc_ft_test_multiqa_pipeline_en_5.5.1_3.0_1731294936076.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cc_uffs_ppc_ft_test_multiqa_pipeline_en_5.5.1_3.0_1731294936076.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cc_uffs_ppc_ft_test_multiqa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cc_uffs_ppc_ft_test_multiqa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cc_uffs_ppc_ft_test_multiqa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/winderfeld/cc-uffs-ppc-ft-test-multiqa + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-celloscope_28000_ner_banglabert_finetuned_en.md b/docs/_posts/ahmedlone127/2024-11-11-celloscope_28000_ner_banglabert_finetuned_en.md new file mode 100644 index 00000000000000..51842560888783 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-celloscope_28000_ner_banglabert_finetuned_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English celloscope_28000_ner_banglabert_finetuned BertForTokenClassification from celloscopeai +author: John Snow Labs +name: celloscope_28000_ner_banglabert_finetuned +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`celloscope_28000_ner_banglabert_finetuned` is a English model originally trained by celloscopeai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/celloscope_28000_ner_banglabert_finetuned_en_5.5.1_3.0_1731285437232.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/celloscope_28000_ner_banglabert_finetuned_en_5.5.1_3.0_1731285437232.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("celloscope_28000_ner_banglabert_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("celloscope_28000_ner_banglabert_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|celloscope_28000_ner_banglabert_finetuned| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|412.2 MB| + +## References + +https://huggingface.co/celloscopeai/celloscope-28000-ner-banglabert-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-celloscope_28000_ner_banglabert_finetuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-celloscope_28000_ner_banglabert_finetuned_pipeline_en.md new file mode 100644 index 00000000000000..7e1a76e90d7d71 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-celloscope_28000_ner_banglabert_finetuned_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English celloscope_28000_ner_banglabert_finetuned_pipeline pipeline BertForTokenClassification from celloscopeai +author: John Snow Labs +name: celloscope_28000_ner_banglabert_finetuned_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`celloscope_28000_ner_banglabert_finetuned_pipeline` is a English model originally trained by celloscopeai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/celloscope_28000_ner_banglabert_finetuned_pipeline_en_5.5.1_3.0_1731285459344.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/celloscope_28000_ner_banglabert_finetuned_pipeline_en_5.5.1_3.0_1731285459344.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("celloscope_28000_ner_banglabert_finetuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("celloscope_28000_ner_banglabert_finetuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|celloscope_28000_ner_banglabert_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.2 MB| + +## References + +https://huggingface.co/celloscopeai/celloscope-28000-ner-banglabert-finetuned + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-checkpoints_almino_en.md b/docs/_posts/ahmedlone127/2024-11-11-checkpoints_almino_en.md new file mode 100644 index 00000000000000..f8b1fef9dc977f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-checkpoints_almino_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English checkpoints_almino WhisperForCTC from almino +author: John Snow Labs +name: checkpoints_almino +date: 2024-11-11 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`checkpoints_almino` is a English model originally trained by almino. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/checkpoints_almino_en_5.5.1_3.0_1731302964849.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/checkpoints_almino_en_5.5.1_3.0_1731302964849.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("checkpoints_almino","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("checkpoints_almino", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|checkpoints_almino| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/almino/checkpoints \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-checkpoints_almino_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-checkpoints_almino_pipeline_en.md new file mode 100644 index 00000000000000..1c0cbd4df4cfab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-checkpoints_almino_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English checkpoints_almino_pipeline pipeline WhisperForCTC from almino +author: John Snow Labs +name: checkpoints_almino_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`checkpoints_almino_pipeline` is a English model originally trained by almino. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/checkpoints_almino_pipeline_en_5.5.1_3.0_1731303059185.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/checkpoints_almino_pipeline_en_5.5.1_3.0_1731303059185.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("checkpoints_almino_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("checkpoints_almino_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|checkpoints_almino_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/almino/checkpoints + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-chinese_roberta_wwm_ext_large_en.md b/docs/_posts/ahmedlone127/2024-11-11-chinese_roberta_wwm_ext_large_en.md new file mode 100644 index 00000000000000..272cf1815a1730 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-chinese_roberta_wwm_ext_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English chinese_roberta_wwm_ext_large BertForTokenClassification from agdsga +author: John Snow Labs +name: chinese_roberta_wwm_ext_large +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chinese_roberta_wwm_ext_large` is a English model originally trained by agdsga. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chinese_roberta_wwm_ext_large_en_5.5.1_3.0_1731291105667.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chinese_roberta_wwm_ext_large_en_5.5.1_3.0_1731291105667.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("chinese_roberta_wwm_ext_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("chinese_roberta_wwm_ext_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chinese_roberta_wwm_ext_large| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/agdsga/chinese-roberta-wwm-ext-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-chinese_roberta_wwm_ext_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-chinese_roberta_wwm_ext_large_pipeline_en.md new file mode 100644 index 00000000000000..e49d1eba0e6fd0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-chinese_roberta_wwm_ext_large_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English chinese_roberta_wwm_ext_large_pipeline pipeline BertForTokenClassification from agdsga +author: John Snow Labs +name: chinese_roberta_wwm_ext_large_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chinese_roberta_wwm_ext_large_pipeline` is a English model originally trained by agdsga. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chinese_roberta_wwm_ext_large_pipeline_en_5.5.1_3.0_1731291172017.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chinese_roberta_wwm_ext_large_pipeline_en_5.5.1_3.0_1731291172017.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("chinese_roberta_wwm_ext_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("chinese_roberta_wwm_ext_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chinese_roberta_wwm_ext_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/agdsga/chinese-roberta-wwm-ext-large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-classify_bluesky_1000_v2_en.md b/docs/_posts/ahmedlone127/2024-11-11-classify_bluesky_1000_v2_en.md new file mode 100644 index 00000000000000..52d890d42c048a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-classify_bluesky_1000_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English classify_bluesky_1000_v2 AlbertForSequenceClassification from Himanshu99001 +author: John Snow Labs +name: classify_bluesky_1000_v2 +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`classify_bluesky_1000_v2` is a English model originally trained by Himanshu99001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/classify_bluesky_1000_v2_en_5.5.1_3.0_1731296867467.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/classify_bluesky_1000_v2_en_5.5.1_3.0_1731296867467.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("classify_bluesky_1000_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("classify_bluesky_1000_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|classify_bluesky_1000_v2| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Himanshu99001/classify-bluesky-1000-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-classify_bluesky_1000_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-classify_bluesky_1000_v2_pipeline_en.md new file mode 100644 index 00000000000000..fcfea659489bba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-classify_bluesky_1000_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English classify_bluesky_1000_v2_pipeline pipeline AlbertForSequenceClassification from Himanshu99001 +author: John Snow Labs +name: classify_bluesky_1000_v2_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`classify_bluesky_1000_v2_pipeline` is a English model originally trained by Himanshu99001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/classify_bluesky_1000_v2_pipeline_en_5.5.1_3.0_1731296869891.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/classify_bluesky_1000_v2_pipeline_en_5.5.1_3.0_1731296869891.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("classify_bluesky_1000_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("classify_bluesky_1000_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|classify_bluesky_1000_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Himanshu99001/classify-bluesky-1000-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-cleaned_e5_base_unsupervised_en.md b/docs/_posts/ahmedlone127/2024-11-11-cleaned_e5_base_unsupervised_en.md new file mode 100644 index 00000000000000..44f53fb5b1fd00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-cleaned_e5_base_unsupervised_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English cleaned_e5_base_unsupervised E5Embeddings from rithwik-db +author: John Snow Labs +name: cleaned_e5_base_unsupervised +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, e5] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: E5Embeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cleaned_e5_base_unsupervised` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cleaned_e5_base_unsupervised_en_5.5.1_3.0_1731300058597.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cleaned_e5_base_unsupervised_en_5.5.1_3.0_1731300058597.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = E5Embeddings.pretrained("cleaned_e5_base_unsupervised","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = E5Embeddings.pretrained("cleaned_e5_base_unsupervised","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cleaned_e5_base_unsupervised| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[E5]| +|Language:|en| +|Size:|386.2 MB| + +## References + +https://huggingface.co/rithwik-db/cleaned-e5-base-unsupervised \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-cleaned_e5_base_unsupervised_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-cleaned_e5_base_unsupervised_pipeline_en.md new file mode 100644 index 00000000000000..5f3facfd1ca268 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-cleaned_e5_base_unsupervised_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English cleaned_e5_base_unsupervised_pipeline pipeline E5Embeddings from rithwik-db +author: John Snow Labs +name: cleaned_e5_base_unsupervised_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cleaned_e5_base_unsupervised_pipeline` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cleaned_e5_base_unsupervised_pipeline_en_5.5.1_3.0_1731300087523.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cleaned_e5_base_unsupervised_pipeline_en_5.5.1_3.0_1731300087523.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cleaned_e5_base_unsupervised_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cleaned_e5_base_unsupervised_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cleaned_e5_base_unsupervised_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|386.2 MB| + +## References + +https://huggingface.co/rithwik-db/cleaned-e5-base-unsupervised + +## Included Models + +- DocumentAssembler +- E5Embeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-cleaned_e5_large_unsupervised_en.md b/docs/_posts/ahmedlone127/2024-11-11-cleaned_e5_large_unsupervised_en.md new file mode 100644 index 00000000000000..6901e54313ff7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-cleaned_e5_large_unsupervised_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English cleaned_e5_large_unsupervised E5Embeddings from rithwik-db +author: John Snow Labs +name: cleaned_e5_large_unsupervised +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, e5] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: E5Embeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cleaned_e5_large_unsupervised` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cleaned_e5_large_unsupervised_en_5.5.1_3.0_1731300839857.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cleaned_e5_large_unsupervised_en_5.5.1_3.0_1731300839857.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = E5Embeddings.pretrained("cleaned_e5_large_unsupervised","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = E5Embeddings.pretrained("cleaned_e5_large_unsupervised","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cleaned_e5_large_unsupervised| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[E5]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/rithwik-db/cleaned-e5-large-unsupervised \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-cleaned_e5_large_unsupervised_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-cleaned_e5_large_unsupervised_pipeline_en.md new file mode 100644 index 00000000000000..3e2cad6a59a779 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-cleaned_e5_large_unsupervised_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English cleaned_e5_large_unsupervised_pipeline pipeline E5Embeddings from rithwik-db +author: John Snow Labs +name: cleaned_e5_large_unsupervised_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cleaned_e5_large_unsupervised_pipeline` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cleaned_e5_large_unsupervised_pipeline_en_5.5.1_3.0_1731300911401.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cleaned_e5_large_unsupervised_pipeline_en_5.5.1_3.0_1731300911401.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cleaned_e5_large_unsupervised_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cleaned_e5_large_unsupervised_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cleaned_e5_large_unsupervised_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/rithwik-db/cleaned-e5-large-unsupervised + +## Included Models + +- DocumentAssembler +- E5Embeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-climateattention_ctw_en.md b/docs/_posts/ahmedlone127/2024-11-11-climateattention_ctw_en.md new file mode 100644 index 00000000000000..63a80ebbcd8fbc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-climateattention_ctw_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English climateattention_ctw RoBertaForTokenClassification from kruthof +author: John Snow Labs +name: climateattention_ctw +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`climateattention_ctw` is a English model originally trained by kruthof. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/climateattention_ctw_en_5.5.1_3.0_1731311348537.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/climateattention_ctw_en_5.5.1_3.0_1731311348537.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("climateattention_ctw","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("climateattention_ctw", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|climateattention_ctw| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|307.4 MB| + +## References + +https://huggingface.co/kruthof/climateattention-ctw \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-climateattention_ctw_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-climateattention_ctw_pipeline_en.md new file mode 100644 index 00000000000000..9201831e6ece88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-climateattention_ctw_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English climateattention_ctw_pipeline pipeline RoBertaForTokenClassification from kruthof +author: John Snow Labs +name: climateattention_ctw_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`climateattention_ctw_pipeline` is a English model originally trained by kruthof. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/climateattention_ctw_pipeline_en_5.5.1_3.0_1731311365014.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/climateattention_ctw_pipeline_en_5.5.1_3.0_1731311365014.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("climateattention_ctw_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("climateattention_ctw_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|climateattention_ctw_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|307.4 MB| + +## References + +https://huggingface.co/kruthof/climateattention-ctw + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-clinical_trial_termination_en.md b/docs/_posts/ahmedlone127/2024-11-11-clinical_trial_termination_en.md new file mode 100644 index 00000000000000..74d7c04df010c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-clinical_trial_termination_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English clinical_trial_termination BertForSequenceClassification from clem21chan +author: John Snow Labs +name: clinical_trial_termination +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinical_trial_termination` is a English model originally trained by clem21chan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinical_trial_termination_en_5.5.1_3.0_1731309477045.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinical_trial_termination_en_5.5.1_3.0_1731309477045.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("clinical_trial_termination","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("clinical_trial_termination", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinical_trial_termination| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|405.5 MB| + +## References + +https://huggingface.co/clem21chan/clinical_trial_termination \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-clinical_trial_termination_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-clinical_trial_termination_pipeline_en.md new file mode 100644 index 00000000000000..aeee5a4a1cac2c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-clinical_trial_termination_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English clinical_trial_termination_pipeline pipeline BertForSequenceClassification from clem21chan +author: John Snow Labs +name: clinical_trial_termination_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinical_trial_termination_pipeline` is a English model originally trained by clem21chan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinical_trial_termination_pipeline_en_5.5.1_3.0_1731309500897.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinical_trial_termination_pipeline_en_5.5.1_3.0_1731309500897.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clinical_trial_termination_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clinical_trial_termination_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinical_trial_termination_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|405.6 MB| + +## References + +https://huggingface.co/clem21chan/clinical_trial_termination + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-deberta_v3_large_lemon_spell_5k_en.md b/docs/_posts/ahmedlone127/2024-11-11-deberta_v3_large_lemon_spell_5k_en.md new file mode 100644 index 00000000000000..2cdc1db3189282 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-deberta_v3_large_lemon_spell_5k_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_lemon_spell_5k DeBertaForTokenClassification from manred1997 +author: John Snow Labs +name: deberta_v3_large_lemon_spell_5k +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_lemon_spell_5k` is a English model originally trained by manred1997. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_lemon_spell_5k_en_5.5.1_3.0_1731306834489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_lemon_spell_5k_en_5.5.1_3.0_1731306834489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("deberta_v3_large_lemon_spell_5k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("deberta_v3_large_lemon_spell_5k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_lemon_spell_5k| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/manred1997/deberta-v3-large-lemon-spell_5k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-deberta_v3_large_lemon_spell_5k_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-deberta_v3_large_lemon_spell_5k_pipeline_en.md new file mode 100644 index 00000000000000..e765205041dfb1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-deberta_v3_large_lemon_spell_5k_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_lemon_spell_5k_pipeline pipeline DeBertaForTokenClassification from manred1997 +author: John Snow Labs +name: deberta_v3_large_lemon_spell_5k_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_lemon_spell_5k_pipeline` is a English model originally trained by manred1997. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_lemon_spell_5k_pipeline_en_5.5.1_3.0_1731306916759.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_lemon_spell_5k_pipeline_en_5.5.1_3.0_1731306916759.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_lemon_spell_5k_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_lemon_spell_5k_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_lemon_spell_5k_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/manred1997/deberta-v3-large-lemon-spell_5k + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-deeppavlov_absa_en.md b/docs/_posts/ahmedlone127/2024-11-11-deeppavlov_absa_en.md new file mode 100644 index 00000000000000..7a78ee7b43d0e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-deeppavlov_absa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deeppavlov_absa XlmRoBertaForTokenClassification from natriistorm +author: John Snow Labs +name: deeppavlov_absa +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deeppavlov_absa` is a English model originally trained by natriistorm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deeppavlov_absa_en_5.5.1_3.0_1731292999917.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deeppavlov_absa_en_5.5.1_3.0_1731292999917.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("deeppavlov_absa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("deeppavlov_absa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deeppavlov_absa| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|857.1 MB| + +## References + +https://huggingface.co/natriistorm/DeepPavlov-ABSA \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-deeppavlov_absa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-deeppavlov_absa_pipeline_en.md new file mode 100644 index 00000000000000..6a9194d07a8233 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-deeppavlov_absa_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deeppavlov_absa_pipeline pipeline XlmRoBertaForTokenClassification from natriistorm +author: John Snow Labs +name: deeppavlov_absa_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deeppavlov_absa_pipeline` is a English model originally trained by natriistorm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deeppavlov_absa_pipeline_en_5.5.1_3.0_1731293111760.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deeppavlov_absa_pipeline_en_5.5.1_3.0_1731293111760.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deeppavlov_absa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deeppavlov_absa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deeppavlov_absa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|857.1 MB| + +## References + +https://huggingface.co/natriistorm/DeepPavlov-ABSA + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-distilhubert_korean_zeroth_ko.md b/docs/_posts/ahmedlone127/2024-11-11-distilhubert_korean_zeroth_ko.md new file mode 100644 index 00000000000000..bd77dc0a180729 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-distilhubert_korean_zeroth_ko.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Korean distilhubert_korean_zeroth HubertForCTC from Bingsu +author: John Snow Labs +name: distilhubert_korean_zeroth +date: 2024-11-11 +tags: [ko, open_source, onnx, asr, hubert] +task: Automatic Speech Recognition +language: ko +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: HubertForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilhubert_korean_zeroth` is a Korean model originally trained by Bingsu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilhubert_korean_zeroth_ko_5.5.1_3.0_1731285005655.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilhubert_korean_zeroth_ko_5.5.1_3.0_1731285005655.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = HubertForCTC.pretrained("distilhubert_korean_zeroth","ko") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = HubertForCTC.pretrained("distilhubert_korean_zeroth", "ko") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilhubert_korean_zeroth| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ko| +|Size:|183.5 MB| + +## References + +https://huggingface.co/Bingsu/distilhubert-ko-zeroth \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-distilhubert_korean_zeroth_pipeline_ko.md b/docs/_posts/ahmedlone127/2024-11-11-distilhubert_korean_zeroth_pipeline_ko.md new file mode 100644 index 00000000000000..e0de5f10ccf087 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-distilhubert_korean_zeroth_pipeline_ko.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Korean distilhubert_korean_zeroth_pipeline pipeline HubertForCTC from Bingsu +author: John Snow Labs +name: distilhubert_korean_zeroth_pipeline +date: 2024-11-11 +tags: [ko, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ko +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilhubert_korean_zeroth_pipeline` is a Korean model originally trained by Bingsu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilhubert_korean_zeroth_pipeline_ko_5.5.1_3.0_1731285014876.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilhubert_korean_zeroth_pipeline_ko_5.5.1_3.0_1731285014876.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilhubert_korean_zeroth_pipeline", lang = "ko") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilhubert_korean_zeroth_pipeline", lang = "ko") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilhubert_korean_zeroth_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ko| +|Size:|183.5 MB| + +## References + +https://huggingface.co/Bingsu/distilhubert-ko-zeroth + +## Included Models + +- AudioAssembler +- HubertForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-e5_base_en.md b/docs/_posts/ahmedlone127/2024-11-11-e5_base_en.md new file mode 100644 index 00000000000000..d945cfebc6a6ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-e5_base_en.md @@ -0,0 +1,67 @@ +--- +layout: model +title: E5 Base Sentence Embeddings +author: John Snow Labs +name: e5_base +date: 2024-11-11 +tags: [en, open_source, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: E5Embeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Text Embeddings by Weakly-Supervised Contrastive Pre-training. Liang Wang, Nan Yang, Xiaolong Huang, Binxing Jiao, Linjun Yang, Daxin Jiang, Rangan Majumder, Furu Wei, arXiv 2022 + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e5_base_en_5.5.1_3.0_1731300102963.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e5_base_en_5.5.1_3.0_1731300102963.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings =E5Embeddings.pretrained("e5_base","en") \ + .setInputCols(["documents"]) \ + .setOutputCol("instructor") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) +``` +```scala +val embeddings = E5Embeddings.pretrained("e5_base","en") + .setInputCols(["document"]) + .setOutputCol("e5_embeddings") +val pipeline = new Pipeline().setStages(Array(document, embeddings)) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e5_base| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[E5]| +|Language:|en| +|Size:|258.6 MB| \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-e5_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-e5_base_pipeline_en.md new file mode 100644 index 00000000000000..714ee62884d867 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-e5_base_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English e5_base_pipeline pipeline E5Embeddings from intfloat +author: John Snow Labs +name: e5_base_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`e5_base_pipeline` is a English model originally trained by intfloat. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e5_base_pipeline_en_5.5.1_3.0_1731300180105.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e5_base_pipeline_en_5.5.1_3.0_1731300180105.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +pipeline = PretrainedPipeline("e5_base_pipeline", lang = "en") +annotations = pipeline.transform(df) +``` +```scala +val pipeline = new PretrainedPipeline("e5_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e5_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|258.6 MB| + +## References + +References + +https://huggingface.co/intfloat/e5-base + +## Included Models + +- DocumentAssembler +- E5Embeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-e5_large_en.md b/docs/_posts/ahmedlone127/2024-11-11-e5_large_en.md new file mode 100644 index 00000000000000..d4a5086d2ecfb6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-e5_large_en.md @@ -0,0 +1,75 @@ +--- +layout: model +title: E5 Large Sentence Embeddings +author: John Snow Labs +name: e5_large +date: 2024-11-11 +tags: [en, open_source, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: E5Embeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Text Embeddings by Weakly-Supervised Contrastive Pre-training. Liang Wang, Nan Yang, Xiaolong Huang, Binxing Jiao, Linjun Yang, Daxin Jiang, Rangan Majumder, Furu Wei, arXiv 2022 + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e5_large_en_5.5.1_3.0_1731300299067.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e5_large_en_5.5.1_3.0_1731300299067.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings =E5Embeddings.pretrained("e5_large","en") \ + .setInputCols(["documents"]) \ + .setOutputCol("instructor") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) +``` +```scala +val embeddings = E5Embeddings.pretrained("e5_large","en") + .setInputCols(["document"]) + .setOutputCol("e5_embeddings") +val pipeline = new Pipeline().setStages(Array(document, embeddings)) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e5_large| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[E5]| +|Language:|en| +|Size:|796.1 MB| + +## References + +References + +References + +https://huggingface.co/intfloat/e5-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-e5_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-e5_large_pipeline_en.md new file mode 100644 index 00000000000000..467e23d43eac59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-e5_large_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English e5_large_pipeline pipeline E5Embeddings from intfloat +author: John Snow Labs +name: e5_large_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`e5_large_pipeline` is a English model originally trained by intfloat. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e5_large_pipeline_en_5.5.1_3.0_1731300535649.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e5_large_pipeline_en_5.5.1_3.0_1731300535649.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +pipeline = PretrainedPipeline("e5_large_pipeline", lang = "en") +annotations = pipeline.transform(df) +``` +```scala +val pipeline = new PretrainedPipeline("e5_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e5_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|796.1 MB| + +## References + +References + +https://huggingface.co/intfloat/e5-large + +## Included Models + +- DocumentAssembler +- E5Embeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-e5_small_en.md b/docs/_posts/ahmedlone127/2024-11-11-e5_small_en.md new file mode 100644 index 00000000000000..090b3563e10d8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-e5_small_en.md @@ -0,0 +1,67 @@ +--- +layout: model +title: E5 Small Sentence Embeddings +author: John Snow Labs +name: e5_small +date: 2024-11-11 +tags: [en, open_source, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: E5Embeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Text Embeddings by Weakly-Supervised Contrastive Pre-training. Liang Wang, Nan Yang, Xiaolong Huang, Binxing Jiao, Linjun Yang, Daxin Jiang, Rangan Majumder, Furu Wei, arXiv 2022 + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e5_small_en_5.5.1_3.0_1731300044693.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e5_small_en_5.5.1_3.0_1731300044693.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings =E5Embeddings.pretrained("e5_small","en") \ + .setInputCols(["documents"]) \ + .setOutputCol("instructor") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) +``` +```scala +val embeddings = E5Embeddings.pretrained("e5_small","en") + .setInputCols(["document"]) + .setOutputCol("e5_embeddings") +val pipeline = new Pipeline().setStages(Array(document, embeddings)) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e5_small| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[E5]| +|Language:|en| +|Size:|79.9 MB| \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-e5_small_lora_ai_generated_detector_en.md b/docs/_posts/ahmedlone127/2024-11-11-e5_small_lora_ai_generated_detector_en.md new file mode 100644 index 00000000000000..a3d9ef09ef1bb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-e5_small_lora_ai_generated_detector_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English e5_small_lora_ai_generated_detector BertForSequenceClassification from MayZhou +author: John Snow Labs +name: e5_small_lora_ai_generated_detector +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`e5_small_lora_ai_generated_detector` is a English model originally trained by MayZhou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e5_small_lora_ai_generated_detector_en_5.5.1_3.0_1731309372403.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e5_small_lora_ai_generated_detector_en_5.5.1_3.0_1731309372403.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("e5_small_lora_ai_generated_detector","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("e5_small_lora_ai_generated_detector", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e5_small_lora_ai_generated_detector| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|87.5 MB| + +## References + +https://huggingface.co/MayZhou/e5-small-lora-ai-generated-detector \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-e5_small_lora_ai_generated_detector_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-e5_small_lora_ai_generated_detector_pipeline_en.md new file mode 100644 index 00000000000000..436ceffa5f5f5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-e5_small_lora_ai_generated_detector_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English e5_small_lora_ai_generated_detector_pipeline pipeline BertForSequenceClassification from MayZhou +author: John Snow Labs +name: e5_small_lora_ai_generated_detector_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`e5_small_lora_ai_generated_detector_pipeline` is a English model originally trained by MayZhou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e5_small_lora_ai_generated_detector_pipeline_en_5.5.1_3.0_1731309394398.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e5_small_lora_ai_generated_detector_pipeline_en_5.5.1_3.0_1731309394398.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("e5_small_lora_ai_generated_detector_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("e5_small_lora_ai_generated_detector_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e5_small_lora_ai_generated_detector_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|87.5 MB| + +## References + +https://huggingface.co/MayZhou/e5-small-lora-ai-generated-detector + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-e5_small_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-e5_small_pipeline_en.md new file mode 100644 index 00000000000000..598d42aa0fb9d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-e5_small_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English e5_small_pipeline pipeline E5Embeddings from intfloat +author: John Snow Labs +name: e5_small_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`e5_small_pipeline` is a English model originally trained by intfloat. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e5_small_pipeline_en_5.5.1_3.0_1731300067905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e5_small_pipeline_en_5.5.1_3.0_1731300067905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +pipeline = PretrainedPipeline("e5_small_pipeline", lang = "en") +annotations = pipeline.transform(df) +``` +```scala +val pipeline = new PretrainedPipeline("e5_small_pipeline", lang = "en") +val annotations = pipeline.transform(df) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e5_small_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|79.9 MB| + +## References + +References + +https://huggingface.co/intfloat/e5-small + +## Included Models + +- DocumentAssembler +- E5Embeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-embedded_e5_base_50_en.md b/docs/_posts/ahmedlone127/2024-11-11-embedded_e5_base_50_en.md new file mode 100644 index 00000000000000..06bcc9d7228d04 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-embedded_e5_base_50_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English embedded_e5_base_50 E5Embeddings from rithwik-db +author: John Snow Labs +name: embedded_e5_base_50 +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, e5] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: E5Embeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`embedded_e5_base_50` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/embedded_e5_base_50_en_5.5.1_3.0_1731300053310.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/embedded_e5_base_50_en_5.5.1_3.0_1731300053310.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = E5Embeddings.pretrained("embedded_e5_base_50","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = E5Embeddings.pretrained("embedded_e5_base_50","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|embedded_e5_base_50| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[E5]| +|Language:|en| +|Size:|379.2 MB| + +## References + +https://huggingface.co/rithwik-db/embedded-e5-base-50 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-embedded_e5_base_50_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-embedded_e5_base_50_pipeline_en.md new file mode 100644 index 00000000000000..de6a205a9ba28d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-embedded_e5_base_50_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English embedded_e5_base_50_pipeline pipeline E5Embeddings from rithwik-db +author: John Snow Labs +name: embedded_e5_base_50_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`embedded_e5_base_50_pipeline` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/embedded_e5_base_50_pipeline_en_5.5.1_3.0_1731300084750.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/embedded_e5_base_50_pipeline_en_5.5.1_3.0_1731300084750.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("embedded_e5_base_50_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("embedded_e5_base_50_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|embedded_e5_base_50_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|379.3 MB| + +## References + +https://huggingface.co/rithwik-db/embedded-e5-base-50 + +## Included Models + +- DocumentAssembler +- E5Embeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-exp_w2v2t_german_hubert_s921_de.md b/docs/_posts/ahmedlone127/2024-11-11-exp_w2v2t_german_hubert_s921_de.md new file mode 100644 index 00000000000000..abaaf8a40c5363 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-exp_w2v2t_german_hubert_s921_de.md @@ -0,0 +1,84 @@ +--- +layout: model +title: German exp_w2v2t_german_hubert_s921 HubertForCTC from jonatasgrosman +author: John Snow Labs +name: exp_w2v2t_german_hubert_s921 +date: 2024-11-11 +tags: [de, open_source, onnx, asr, hubert] +task: Automatic Speech Recognition +language: de +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: HubertForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`exp_w2v2t_german_hubert_s921` is a German model originally trained by jonatasgrosman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/exp_w2v2t_german_hubert_s921_de_5.5.1_3.0_1731286788135.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/exp_w2v2t_german_hubert_s921_de_5.5.1_3.0_1731286788135.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = HubertForCTC.pretrained("exp_w2v2t_german_hubert_s921","de") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = HubertForCTC.pretrained("exp_w2v2t_german_hubert_s921", "de") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|exp_w2v2t_german_hubert_s921| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|de| +|Size:|2.4 GB| + +## References + +https://huggingface.co/jonatasgrosman/exp_w2v2t_de_hubert_s921 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-exp_w2v2t_german_hubert_s921_pipeline_de.md b/docs/_posts/ahmedlone127/2024-11-11-exp_w2v2t_german_hubert_s921_pipeline_de.md new file mode 100644 index 00000000000000..688625ff61f30b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-exp_w2v2t_german_hubert_s921_pipeline_de.md @@ -0,0 +1,69 @@ +--- +layout: model +title: German exp_w2v2t_german_hubert_s921_pipeline pipeline HubertForCTC from jonatasgrosman +author: John Snow Labs +name: exp_w2v2t_german_hubert_s921_pipeline +date: 2024-11-11 +tags: [de, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: de +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`exp_w2v2t_german_hubert_s921_pipeline` is a German model originally trained by jonatasgrosman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/exp_w2v2t_german_hubert_s921_pipeline_de_5.5.1_3.0_1731286902986.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/exp_w2v2t_german_hubert_s921_pipeline_de_5.5.1_3.0_1731286902986.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("exp_w2v2t_german_hubert_s921_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("exp_w2v2t_german_hubert_s921_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|exp_w2v2t_german_hubert_s921_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|2.4 GB| + +## References + +https://huggingface.co/jonatasgrosman/exp_w2v2t_de_hubert_s921 + +## Included Models + +- AudioAssembler +- HubertForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-exp_w2v2t_persian_farsi_hubert_s889_fa.md b/docs/_posts/ahmedlone127/2024-11-11-exp_w2v2t_persian_farsi_hubert_s889_fa.md new file mode 100644 index 00000000000000..f1767a4b5e2d37 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-exp_w2v2t_persian_farsi_hubert_s889_fa.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Persian exp_w2v2t_persian_farsi_hubert_s889 HubertForCTC from jonatasgrosman +author: John Snow Labs +name: exp_w2v2t_persian_farsi_hubert_s889 +date: 2024-11-11 +tags: [fa, open_source, onnx, asr, hubert] +task: Automatic Speech Recognition +language: fa +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: HubertForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`exp_w2v2t_persian_farsi_hubert_s889` is a Persian model originally trained by jonatasgrosman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/exp_w2v2t_persian_farsi_hubert_s889_fa_5.5.1_3.0_1731283735194.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/exp_w2v2t_persian_farsi_hubert_s889_fa_5.5.1_3.0_1731283735194.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = HubertForCTC.pretrained("exp_w2v2t_persian_farsi_hubert_s889","fa") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = HubertForCTC.pretrained("exp_w2v2t_persian_farsi_hubert_s889", "fa") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|exp_w2v2t_persian_farsi_hubert_s889| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|fa| +|Size:|2.4 GB| + +## References + +https://huggingface.co/jonatasgrosman/exp_w2v2t_fa_hubert_s889 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-exp_w2v2t_persian_farsi_hubert_s889_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-11-11-exp_w2v2t_persian_farsi_hubert_s889_pipeline_fa.md new file mode 100644 index 00000000000000..f8a5b94d21856f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-exp_w2v2t_persian_farsi_hubert_s889_pipeline_fa.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Persian exp_w2v2t_persian_farsi_hubert_s889_pipeline pipeline HubertForCTC from jonatasgrosman +author: John Snow Labs +name: exp_w2v2t_persian_farsi_hubert_s889_pipeline +date: 2024-11-11 +tags: [fa, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: fa +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`exp_w2v2t_persian_farsi_hubert_s889_pipeline` is a Persian model originally trained by jonatasgrosman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/exp_w2v2t_persian_farsi_hubert_s889_pipeline_fa_5.5.1_3.0_1731283877387.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/exp_w2v2t_persian_farsi_hubert_s889_pipeline_fa_5.5.1_3.0_1731283877387.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("exp_w2v2t_persian_farsi_hubert_s889_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("exp_w2v2t_persian_farsi_hubert_s889_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|exp_w2v2t_persian_farsi_hubert_s889_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|2.4 GB| + +## References + +https://huggingface.co/jonatasgrosman/exp_w2v2t_fa_hubert_s889 + +## Included Models + +- AudioAssembler +- HubertForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fab_ramy_v1_en.md b/docs/_posts/ahmedlone127/2024-11-11-fab_ramy_v1_en.md new file mode 100644 index 00000000000000..5232e8bf3394d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fab_ramy_v1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English fab_ramy_v1 MPNetEmbeddings from qinxianliu +author: John Snow Labs +name: fab_ramy_v1 +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fab_ramy_v1` is a English model originally trained by qinxianliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fab_ramy_v1_en_5.5.1_3.0_1731294774877.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fab_ramy_v1_en_5.5.1_3.0_1731294774877.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("fab_ramy_v1","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("fab_ramy_v1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fab_ramy_v1| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/qinxianliu/FAB-Ramy-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fab_ramy_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-fab_ramy_v1_pipeline_en.md new file mode 100644 index 00000000000000..ec125add711018 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fab_ramy_v1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English fab_ramy_v1_pipeline pipeline MPNetEmbeddings from qinxianliu +author: John Snow Labs +name: fab_ramy_v1_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fab_ramy_v1_pipeline` is a English model originally trained by qinxianliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fab_ramy_v1_pipeline_en_5.5.1_3.0_1731294796154.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fab_ramy_v1_pipeline_en_5.5.1_3.0_1731294796154.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fab_ramy_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fab_ramy_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fab_ramy_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/qinxianliu/FAB-Ramy-v1 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-factuality_model_en.md b/docs/_posts/ahmedlone127/2024-11-11-factuality_model_en.md new file mode 100644 index 00000000000000..0d89b27c77db7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-factuality_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English factuality_model BertForSequenceClassification from gljj +author: John Snow Labs +name: factuality_model +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`factuality_model` is a English model originally trained by gljj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/factuality_model_en_5.5.1_3.0_1731309550709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/factuality_model_en_5.5.1_3.0_1731309550709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("factuality_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("factuality_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|factuality_model| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/gljj/factuality-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-factuality_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-factuality_model_pipeline_en.md new file mode 100644 index 00000000000000..029a7e3f16c80e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-factuality_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English factuality_model_pipeline pipeline BertForSequenceClassification from gljj +author: John Snow Labs +name: factuality_model_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`factuality_model_pipeline` is a English model originally trained by gljj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/factuality_model_pipeline_en_5.5.1_3.0_1731309573179.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/factuality_model_pipeline_en_5.5.1_3.0_1731309573179.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("factuality_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("factuality_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|factuality_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/gljj/factuality-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fashion_clip_inference_en.md b/docs/_posts/ahmedlone127/2024-11-11-fashion_clip_inference_en.md new file mode 100644 index 00000000000000..b9d41b4fbd0961 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fashion_clip_inference_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English fashion_clip_inference CLIPForZeroShotClassification from Fluf22 +author: John Snow Labs +name: fashion_clip_inference +date: 2024-11-11 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fashion_clip_inference` is a English model originally trained by Fluf22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fashion_clip_inference_en_5.5.1_3.0_1731287432963.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fashion_clip_inference_en_5.5.1_3.0_1731287432963.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("fashion_clip_inference","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("fashion_clip_inference","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fashion_clip_inference| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|397.7 MB| + +## References + +https://huggingface.co/Fluf22/fashion-clip-inference \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fashion_clip_inference_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-fashion_clip_inference_pipeline_en.md new file mode 100644 index 00000000000000..65e5dac4959c25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fashion_clip_inference_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English fashion_clip_inference_pipeline pipeline CLIPForZeroShotClassification from Fluf22 +author: John Snow Labs +name: fashion_clip_inference_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fashion_clip_inference_pipeline` is a English model originally trained by Fluf22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fashion_clip_inference_pipeline_en_5.5.1_3.0_1731287527759.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fashion_clip_inference_pipeline_en_5.5.1_3.0_1731287527759.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fashion_clip_inference_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fashion_clip_inference_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fashion_clip_inference_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|397.7 MB| + +## References + +https://huggingface.co/Fluf22/fashion-clip-inference + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_bge_large_en.md b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_bge_large_en.md new file mode 100644 index 00000000000000..e11b067a8a7785 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_bge_large_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English fine_tuned_bge_large BGEEmbeddings from VaggP +author: John Snow Labs +name: fine_tuned_bge_large +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_bge_large` is a English model originally trained by VaggP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_bge_large_en_5.5.1_3.0_1731313695765.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_bge_large_en_5.5.1_3.0_1731313695765.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("fine_tuned_bge_large","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("fine_tuned_bge_large","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_bge_large| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/VaggP/fine-tuned-bge-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_bge_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_bge_large_pipeline_en.md new file mode 100644 index 00000000000000..757152937ee2a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_bge_large_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English fine_tuned_bge_large_pipeline pipeline BGEEmbeddings from VaggP +author: John Snow Labs +name: fine_tuned_bge_large_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_bge_large_pipeline` is a English model originally trained by VaggP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_bge_large_pipeline_en_5.5.1_3.0_1731313768340.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_bge_large_pipeline_en_5.5.1_3.0_1731313768340.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fine_tuned_bge_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fine_tuned_bge_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_bge_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/VaggP/fine-tuned-bge-large + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_en.md b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_en.md new file mode 100644 index 00000000000000..373910cc722f33 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05 BertForQuestionAnswering from muhammadravi251001 +author: John Snow Labs +name: fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05 +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05` is a English model originally trained by muhammadravi251001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_en_5.5.1_3.0_1731288819872.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_en_5.5.1_3.0_1731288819872.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/muhammadravi251001/fine-tuned-DatasetQAS-IDK-MRC-with-indobert-large-p2-with-ITTL-with-freeze-LR-1e-05 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_pipeline_en.md new file mode 100644 index 00000000000000..e12fac29b141f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_pipeline pipeline BertForQuestionAnswering from muhammadravi251001 +author: John Snow Labs +name: fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_pipeline` is a English model originally trained by muhammadravi251001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_pipeline_en_5.5.1_3.0_1731288888810.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_pipeline_en_5.5.1_3.0_1731288888810.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_datasetqas_idk_mrc_with_indobert_large_p2_with_ittl_with_freeze_lr_1e_05_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/muhammadravi251001/fine-tuned-DatasetQAS-IDK-MRC-with-indobert-large-p2-with-ITTL-with-freeze-LR-1e-05 + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_metaphor_detection_en.md b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_metaphor_detection_en.md new file mode 100644 index 00000000000000..4ee03c04d84b8f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_metaphor_detection_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English fine_tuned_metaphor_detection BertForSequenceClassification from Sasidhar1826 +author: John Snow Labs +name: fine_tuned_metaphor_detection +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_metaphor_detection` is a English model originally trained by Sasidhar1826. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_metaphor_detection_en_5.5.1_3.0_1731309326875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_metaphor_detection_en_5.5.1_3.0_1731309326875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_metaphor_detection","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_metaphor_detection", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_metaphor_detection| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Sasidhar1826/fine-tuned-metaphor-detection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_metaphor_detection_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_metaphor_detection_pipeline_en.md new file mode 100644 index 00000000000000..17786ff6b61d0d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_metaphor_detection_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English fine_tuned_metaphor_detection_pipeline pipeline BertForSequenceClassification from Sasidhar1826 +author: John Snow Labs +name: fine_tuned_metaphor_detection_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_metaphor_detection_pipeline` is a English model originally trained by Sasidhar1826. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_metaphor_detection_pipeline_en_5.5.1_3.0_1731309349158.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_metaphor_detection_pipeline_en_5.5.1_3.0_1731309349158.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fine_tuned_metaphor_detection_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fine_tuned_metaphor_detection_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_metaphor_detection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Sasidhar1826/fine-tuned-metaphor-detection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_model_resume_en.md b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_model_resume_en.md new file mode 100644 index 00000000000000..d17011b2be0e65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_model_resume_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English fine_tuned_model_resume MPNetEmbeddings from chamalbistec +author: John Snow Labs +name: fine_tuned_model_resume +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_model_resume` is a English model originally trained by chamalbistec. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_model_resume_en_5.5.1_3.0_1731294871924.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_model_resume_en_5.5.1_3.0_1731294871924.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("fine_tuned_model_resume","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("fine_tuned_model_resume","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_model_resume| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/chamalbistec/fine-tuned-model-resume \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_model_resume_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_model_resume_pipeline_en.md new file mode 100644 index 00000000000000..bb0960d9de45d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_model_resume_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English fine_tuned_model_resume_pipeline pipeline MPNetEmbeddings from chamalbistec +author: John Snow Labs +name: fine_tuned_model_resume_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_model_resume_pipeline` is a English model originally trained by chamalbistec. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_model_resume_pipeline_en_5.5.1_3.0_1731294897314.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_model_resume_pipeline_en_5.5.1_3.0_1731294897314.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fine_tuned_model_resume_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fine_tuned_model_resume_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_model_resume_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/chamalbistec/fine-tuned-model-resume + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_mpnet_model_en.md b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_mpnet_model_en.md new file mode 100644 index 00000000000000..72e94d2f098f26 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_mpnet_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English fine_tuned_mpnet_model MPNetEmbeddings from adityasajja6 +author: John Snow Labs +name: fine_tuned_mpnet_model +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_mpnet_model` is a English model originally trained by adityasajja6. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_mpnet_model_en_5.5.1_3.0_1731294702635.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_mpnet_model_en_5.5.1_3.0_1731294702635.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("fine_tuned_mpnet_model","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("fine_tuned_mpnet_model","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_mpnet_model| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/adityasajja6/fine_tuned_mpnet_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_mpnet_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_mpnet_model_pipeline_en.md new file mode 100644 index 00000000000000..386ac09a149993 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fine_tuned_mpnet_model_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English fine_tuned_mpnet_model_pipeline pipeline MPNetEmbeddings from adityasajja6 +author: John Snow Labs +name: fine_tuned_mpnet_model_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_mpnet_model_pipeline` is a English model originally trained by adityasajja6. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_mpnet_model_pipeline_en_5.5.1_3.0_1731294728871.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_mpnet_model_pipeline_en_5.5.1_3.0_1731294728871.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fine_tuned_mpnet_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fine_tuned_mpnet_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_mpnet_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/adityasajja6/fine_tuned_mpnet_model + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-finetuned_baai_bge_base_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-finetuned_baai_bge_base_english_pipeline_en.md new file mode 100644 index 00000000000000..0bbb8d7b5faf31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-finetuned_baai_bge_base_english_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English finetuned_baai_bge_base_english_pipeline pipeline BGEEmbeddings from ivanleomk +author: John Snow Labs +name: finetuned_baai_bge_base_english_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_baai_bge_base_english_pipeline` is a English model originally trained by ivanleomk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_baai_bge_base_english_pipeline_en_5.5.1_3.0_1731312762847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_baai_bge_base_english_pipeline_en_5.5.1_3.0_1731312762847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_baai_bge_base_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_baai_bge_base_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_baai_bge_base_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|376.1 MB| + +## References + +https://huggingface.co/ivanleomk/finetuned-BAAI-bge-base-en + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-finetuned_bge_base_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-finetuned_bge_base_english_pipeline_en.md new file mode 100644 index 00000000000000..05ea6358173bfa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-finetuned_bge_base_english_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English finetuned_bge_base_english_pipeline pipeline BGEEmbeddings from ivanleomk +author: John Snow Labs +name: finetuned_bge_base_english_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_bge_base_english_pipeline` is a English model originally trained by ivanleomk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_bge_base_english_pipeline_en_5.5.1_3.0_1731312760292.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_bge_base_english_pipeline_en_5.5.1_3.0_1731312760292.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_bge_base_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_bge_base_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_bge_base_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|376.1 MB| + +## References + +https://huggingface.co/ivanleomk/finetuned-bge-base-en + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-finetuned_embedding_v3_en.md b/docs/_posts/ahmedlone127/2024-11-11-finetuned_embedding_v3_en.md new file mode 100644 index 00000000000000..db764750f949ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-finetuned_embedding_v3_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English finetuned_embedding_v3 MPNetEmbeddings from KayaAI +author: John Snow Labs +name: finetuned_embedding_v3 +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_embedding_v3` is a English model originally trained by KayaAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_embedding_v3_en_5.5.1_3.0_1731294877887.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_embedding_v3_en_5.5.1_3.0_1731294877887.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("finetuned_embedding_v3","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("finetuned_embedding_v3","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_embedding_v3| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/KayaAI/finetuned_embedding_v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-finetuned_embedding_v3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-finetuned_embedding_v3_pipeline_en.md new file mode 100644 index 00000000000000..f2c4c95430ccde --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-finetuned_embedding_v3_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English finetuned_embedding_v3_pipeline pipeline MPNetEmbeddings from KayaAI +author: John Snow Labs +name: finetuned_embedding_v3_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_embedding_v3_pipeline` is a English model originally trained by KayaAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_embedding_v3_pipeline_en_5.5.1_3.0_1731294900663.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_embedding_v3_pipeline_en_5.5.1_3.0_1731294900663.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_embedding_v3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_embedding_v3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_embedding_v3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/KayaAI/finetuned_embedding_v3 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-finetuned_sentence_similarity_en.md b/docs/_posts/ahmedlone127/2024-11-11-finetuned_sentence_similarity_en.md new file mode 100644 index 00000000000000..52d484ace9a5e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-finetuned_sentence_similarity_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuned_sentence_similarity MPNetForSequenceClassification from SynthAIzer +author: John Snow Labs +name: finetuned_sentence_similarity +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_sentence_similarity` is a English model originally trained by SynthAIzer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_sentence_similarity_en_5.5.1_3.0_1731301475665.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_sentence_similarity_en_5.5.1_3.0_1731301475665.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("finetuned_sentence_similarity","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("finetuned_sentence_similarity", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_sentence_similarity| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.2 MB| + +## References + +https://huggingface.co/SynthAIzer/finetuned-sentence-similarity \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-finetuned_sentence_similarity_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-finetuned_sentence_similarity_pipeline_en.md new file mode 100644 index 00000000000000..9c2bffa5fa63ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-finetuned_sentence_similarity_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuned_sentence_similarity_pipeline pipeline MPNetForSequenceClassification from SynthAIzer +author: John Snow Labs +name: finetuned_sentence_similarity_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_sentence_similarity_pipeline` is a English model originally trained by SynthAIzer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_sentence_similarity_pipeline_en_5.5.1_3.0_1731301497210.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_sentence_similarity_pipeline_en_5.5.1_3.0_1731301497210.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_sentence_similarity_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_sentence_similarity_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_sentence_similarity_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.2 MB| + +## References + +https://huggingface.co/SynthAIzer/finetuned-sentence-similarity + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-flash_italian_ns_classifier_fpt_en.md b/docs/_posts/ahmedlone127/2024-11-11-flash_italian_ns_classifier_fpt_en.md new file mode 100644 index 00000000000000..b22987d16eea1e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-flash_italian_ns_classifier_fpt_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English flash_italian_ns_classifier_fpt BertForSequenceClassification from mrinaldi +author: John Snow Labs +name: flash_italian_ns_classifier_fpt +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`flash_italian_ns_classifier_fpt` is a English model originally trained by mrinaldi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/flash_italian_ns_classifier_fpt_en_5.5.1_3.0_1731310131977.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/flash_italian_ns_classifier_fpt_en_5.5.1_3.0_1731310131977.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("flash_italian_ns_classifier_fpt","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("flash_italian_ns_classifier_fpt", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|flash_italian_ns_classifier_fpt| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|414.8 MB| + +## References + +https://huggingface.co/mrinaldi/flash-it-ns-classifier-fpt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-flash_italian_ns_classifier_fpt_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-flash_italian_ns_classifier_fpt_pipeline_en.md new file mode 100644 index 00000000000000..c0e1f68af987d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-flash_italian_ns_classifier_fpt_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English flash_italian_ns_classifier_fpt_pipeline pipeline BertForSequenceClassification from mrinaldi +author: John Snow Labs +name: flash_italian_ns_classifier_fpt_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`flash_italian_ns_classifier_fpt_pipeline` is a English model originally trained by mrinaldi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/flash_italian_ns_classifier_fpt_pipeline_en_5.5.1_3.0_1731310156304.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/flash_italian_ns_classifier_fpt_pipeline_en_5.5.1_3.0_1731310156304.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("flash_italian_ns_classifier_fpt_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("flash_italian_ns_classifier_fpt_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|flash_italian_ns_classifier_fpt_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|414.8 MB| + +## References + +https://huggingface.co/mrinaldi/flash-it-ns-classifier-fpt + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fund_embedder_en.md b/docs/_posts/ahmedlone127/2024-11-11-fund_embedder_en.md new file mode 100644 index 00000000000000..4abe5667d2f891 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fund_embedder_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English fund_embedder MPNetEmbeddings from tifin-india +author: John Snow Labs +name: fund_embedder +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fund_embedder` is a English model originally trained by tifin-india. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fund_embedder_en_5.5.1_3.0_1731295095363.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fund_embedder_en_5.5.1_3.0_1731295095363.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("fund_embedder","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("fund_embedder","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fund_embedder| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|387.1 MB| + +## References + +https://huggingface.co/tifin-india/fund-embedder \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-fund_embedder_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-fund_embedder_pipeline_en.md new file mode 100644 index 00000000000000..2126840c73fb50 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-fund_embedder_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English fund_embedder_pipeline pipeline MPNetEmbeddings from tifin-india +author: John Snow Labs +name: fund_embedder_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fund_embedder_pipeline` is a English model originally trained by tifin-india. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fund_embedder_pipeline_en_5.5.1_3.0_1731295128618.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fund_embedder_pipeline_en_5.5.1_3.0_1731295128618.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fund_embedder_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fund_embedder_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fund_embedder_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|387.1 MB| + +## References + +https://huggingface.co/tifin-india/fund-embedder + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hardware_ner_prod_en.md b/docs/_posts/ahmedlone127/2024-11-11-hardware_ner_prod_en.md new file mode 100644 index 00000000000000..87812d0d24795d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hardware_ner_prod_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hardware_ner_prod BertForTokenClassification from hadiaskari98 +author: John Snow Labs +name: hardware_ner_prod +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hardware_ner_prod` is a English model originally trained by hadiaskari98. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hardware_ner_prod_en_5.5.1_3.0_1731299413030.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hardware_ner_prod_en_5.5.1_3.0_1731299413030.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("hardware_ner_prod","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("hardware_ner_prod", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hardware_ner_prod| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/hadiaskari98/Hardware_NER_prod \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hardware_ner_prod_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-hardware_ner_prod_pipeline_en.md new file mode 100644 index 00000000000000..a1f134e615be42 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hardware_ner_prod_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hardware_ner_prod_pipeline pipeline BertForTokenClassification from hadiaskari98 +author: John Snow Labs +name: hardware_ner_prod_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hardware_ner_prod_pipeline` is a English model originally trained by hadiaskari98. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hardware_ner_prod_pipeline_en_5.5.1_3.0_1731299477477.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hardware_ner_prod_pipeline_en_5.5.1_3.0_1731299477477.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hardware_ner_prod_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hardware_ner_prod_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hardware_ner_prod_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/hadiaskari98/Hardware_NER_prod + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hebert_finetuned_precedents_he.md b/docs/_posts/ahmedlone127/2024-11-11-hebert_finetuned_precedents_he.md new file mode 100644 index 00000000000000..5a824261538baa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hebert_finetuned_precedents_he.md @@ -0,0 +1,86 @@ +--- +layout: model +title: Hebrew hebert_finetuned_precedents BertForQuestionAnswering from shay681 +author: John Snow Labs +name: hebert_finetuned_precedents +date: 2024-11-11 +tags: [he, open_source, onnx, question_answering, bert] +task: Question Answering +language: he +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hebert_finetuned_precedents` is a Hebrew model originally trained by shay681. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hebert_finetuned_precedents_he_5.5.1_3.0_1731307828918.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hebert_finetuned_precedents_he_5.5.1_3.0_1731307828918.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("hebert_finetuned_precedents","he") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("hebert_finetuned_precedents", "he") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hebert_finetuned_precedents| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|he| +|Size:|408.1 MB| + +## References + +https://huggingface.co/shay681/HeBERT_finetuned_Precedents \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hebert_finetuned_precedents_pipeline_he.md b/docs/_posts/ahmedlone127/2024-11-11-hebert_finetuned_precedents_pipeline_he.md new file mode 100644 index 00000000000000..c4699032f5ebb6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hebert_finetuned_precedents_pipeline_he.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Hebrew hebert_finetuned_precedents_pipeline pipeline BertForQuestionAnswering from shay681 +author: John Snow Labs +name: hebert_finetuned_precedents_pipeline +date: 2024-11-11 +tags: [he, open_source, pipeline, onnx] +task: Question Answering +language: he +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hebert_finetuned_precedents_pipeline` is a Hebrew model originally trained by shay681. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hebert_finetuned_precedents_pipeline_he_5.5.1_3.0_1731307850161.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hebert_finetuned_precedents_pipeline_he_5.5.1_3.0_1731307850161.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hebert_finetuned_precedents_pipeline", lang = "he") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hebert_finetuned_precedents_pipeline", lang = "he") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hebert_finetuned_precedents_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|he| +|Size:|408.1 MB| + +## References + +https://huggingface.co/shay681/HeBERT_finetuned_Precedents + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hs_arabic_translate_syn_4class_for_tool_en.md b/docs/_posts/ahmedlone127/2024-11-11-hs_arabic_translate_syn_4class_for_tool_en.md new file mode 100644 index 00000000000000..83ca6f6b20b0a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hs_arabic_translate_syn_4class_for_tool_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hs_arabic_translate_syn_4class_for_tool BertForSequenceClassification from SoDehghan +author: John Snow Labs +name: hs_arabic_translate_syn_4class_for_tool +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hs_arabic_translate_syn_4class_for_tool` is a English model originally trained by SoDehghan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hs_arabic_translate_syn_4class_for_tool_en_5.5.1_3.0_1731309364917.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hs_arabic_translate_syn_4class_for_tool_en_5.5.1_3.0_1731309364917.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("hs_arabic_translate_syn_4class_for_tool","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("hs_arabic_translate_syn_4class_for_tool", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hs_arabic_translate_syn_4class_for_tool| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|507.5 MB| + +## References + +https://huggingface.co/SoDehghan/hs-ar-translate-syn-4class-for-tool \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hs_arabic_translate_syn_4class_for_tool_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-hs_arabic_translate_syn_4class_for_tool_pipeline_en.md new file mode 100644 index 00000000000000..0e5028da215122 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hs_arabic_translate_syn_4class_for_tool_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hs_arabic_translate_syn_4class_for_tool_pipeline pipeline BertForSequenceClassification from SoDehghan +author: John Snow Labs +name: hs_arabic_translate_syn_4class_for_tool_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hs_arabic_translate_syn_4class_for_tool_pipeline` is a English model originally trained by SoDehghan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hs_arabic_translate_syn_4class_for_tool_pipeline_en_5.5.1_3.0_1731309392366.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hs_arabic_translate_syn_4class_for_tool_pipeline_en_5.5.1_3.0_1731309392366.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hs_arabic_translate_syn_4class_for_tool_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hs_arabic_translate_syn_4class_for_tool_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hs_arabic_translate_syn_4class_for_tool_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|507.5 MB| + +## References + +https://huggingface.co/SoDehghan/hs-ar-translate-syn-4class-for-tool + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hubert_base_japanese_asr_ja.md b/docs/_posts/ahmedlone127/2024-11-11-hubert_base_japanese_asr_ja.md new file mode 100644 index 00000000000000..9664aa5c122e38 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hubert_base_japanese_asr_ja.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Japanese hubert_base_japanese_asr HubertForCTC from TKU410410103 +author: John Snow Labs +name: hubert_base_japanese_asr +date: 2024-11-11 +tags: [ja, open_source, onnx, asr, hubert] +task: Automatic Speech Recognition +language: ja +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: HubertForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubert_base_japanese_asr` is a Japanese model originally trained by TKU410410103. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubert_base_japanese_asr_ja_5.5.1_3.0_1731284393140.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubert_base_japanese_asr_ja_5.5.1_3.0_1731284393140.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = HubertForCTC.pretrained("hubert_base_japanese_asr","ja") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = HubertForCTC.pretrained("hubert_base_japanese_asr", "ja") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubert_base_japanese_asr| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ja| +|Size:|697.4 MB| + +## References + +https://huggingface.co/TKU410410103/hubert-base-japanese-asr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hubert_base_japanese_asr_pipeline_ja.md b/docs/_posts/ahmedlone127/2024-11-11-hubert_base_japanese_asr_pipeline_ja.md new file mode 100644 index 00000000000000..a3c5ebae6a180f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hubert_base_japanese_asr_pipeline_ja.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Japanese hubert_base_japanese_asr_pipeline pipeline HubertForCTC from TKU410410103 +author: John Snow Labs +name: hubert_base_japanese_asr_pipeline +date: 2024-11-11 +tags: [ja, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ja +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubert_base_japanese_asr_pipeline` is a Japanese model originally trained by TKU410410103. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubert_base_japanese_asr_pipeline_ja_5.5.1_3.0_1731284434411.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubert_base_japanese_asr_pipeline_ja_5.5.1_3.0_1731284434411.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hubert_base_japanese_asr_pipeline", lang = "ja") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hubert_base_japanese_asr_pipeline", lang = "ja") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubert_base_japanese_asr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ja| +|Size:|697.4 MB| + +## References + +https://huggingface.co/TKU410410103/hubert-base-japanese-asr + +## Included Models + +- AudioAssembler +- HubertForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hubert_large_arabic_egyptian_ar.md b/docs/_posts/ahmedlone127/2024-11-11-hubert_large_arabic_egyptian_ar.md new file mode 100644 index 00000000000000..7efe6d4c7e1243 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hubert_large_arabic_egyptian_ar.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Arabic hubert_large_arabic_egyptian HubertForCTC from omarxadel +author: John Snow Labs +name: hubert_large_arabic_egyptian +date: 2024-11-11 +tags: [ar, open_source, onnx, asr, hubert] +task: Automatic Speech Recognition +language: ar +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: HubertForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubert_large_arabic_egyptian` is a Arabic model originally trained by omarxadel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubert_large_arabic_egyptian_ar_5.5.1_3.0_1731283621734.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubert_large_arabic_egyptian_ar_5.5.1_3.0_1731283621734.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = HubertForCTC.pretrained("hubert_large_arabic_egyptian","ar") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = HubertForCTC.pretrained("hubert_large_arabic_egyptian", "ar") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubert_large_arabic_egyptian| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ar| +|Size:|2.4 GB| + +## References + +https://huggingface.co/omarxadel/hubert-large-arabic-egyptian \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hubert_large_arabic_egyptian_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-11-11-hubert_large_arabic_egyptian_pipeline_ar.md new file mode 100644 index 00000000000000..be4886ef62da41 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hubert_large_arabic_egyptian_pipeline_ar.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Arabic hubert_large_arabic_egyptian_pipeline pipeline HubertForCTC from omarxadel +author: John Snow Labs +name: hubert_large_arabic_egyptian_pipeline +date: 2024-11-11 +tags: [ar, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ar +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubert_large_arabic_egyptian_pipeline` is a Arabic model originally trained by omarxadel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubert_large_arabic_egyptian_pipeline_ar_5.5.1_3.0_1731283742320.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubert_large_arabic_egyptian_pipeline_ar_5.5.1_3.0_1731283742320.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hubert_large_arabic_egyptian_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hubert_large_arabic_egyptian_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubert_large_arabic_egyptian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|2.4 GB| + +## References + +https://huggingface.co/omarxadel/hubert-large-arabic-egyptian + +## Included Models + +- AudioAssembler +- HubertForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hubert_large_japanese_asr_ja.md b/docs/_posts/ahmedlone127/2024-11-11-hubert_large_japanese_asr_ja.md new file mode 100644 index 00000000000000..fc3dc746222497 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hubert_large_japanese_asr_ja.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Japanese hubert_large_japanese_asr HubertForCTC from TKU410410103 +author: John Snow Labs +name: hubert_large_japanese_asr +date: 2024-11-11 +tags: [ja, open_source, onnx, asr, hubert] +task: Automatic Speech Recognition +language: ja +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: HubertForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubert_large_japanese_asr` is a Japanese model originally trained by TKU410410103. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubert_large_japanese_asr_ja_5.5.1_3.0_1731283708086.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubert_large_japanese_asr_ja_5.5.1_3.0_1731283708086.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = HubertForCTC.pretrained("hubert_large_japanese_asr","ja") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = HubertForCTC.pretrained("hubert_large_japanese_asr", "ja") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubert_large_japanese_asr| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ja| +|Size:|2.4 GB| + +## References + +https://huggingface.co/TKU410410103/hubert-large-japanese-asr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hubert_large_japanese_asr_pipeline_ja.md b/docs/_posts/ahmedlone127/2024-11-11-hubert_large_japanese_asr_pipeline_ja.md new file mode 100644 index 00000000000000..6259a5600a7c6a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hubert_large_japanese_asr_pipeline_ja.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Japanese hubert_large_japanese_asr_pipeline pipeline HubertForCTC from TKU410410103 +author: John Snow Labs +name: hubert_large_japanese_asr_pipeline +date: 2024-11-11 +tags: [ja, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ja +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubert_large_japanese_asr_pipeline` is a Japanese model originally trained by TKU410410103. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubert_large_japanese_asr_pipeline_ja_5.5.1_3.0_1731283833877.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubert_large_japanese_asr_pipeline_ja_5.5.1_3.0_1731283833877.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hubert_large_japanese_asr_pipeline", lang = "ja") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hubert_large_japanese_asr_pipeline", lang = "ja") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubert_large_japanese_asr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ja| +|Size:|2.4 GB| + +## References + +https://huggingface.co/TKU410410103/hubert-large-japanese-asr + +## Included Models + +- AudioAssembler +- HubertForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hubert_ukrainian_pipeline_uk.md b/docs/_posts/ahmedlone127/2024-11-11-hubert_ukrainian_pipeline_uk.md new file mode 100644 index 00000000000000..b0f3f201db86bc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hubert_ukrainian_pipeline_uk.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Ukrainian hubert_ukrainian_pipeline pipeline HubertForCTC from Yehor +author: John Snow Labs +name: hubert_ukrainian_pipeline +date: 2024-11-11 +tags: [uk, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: uk +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubert_ukrainian_pipeline` is a Ukrainian model originally trained by Yehor. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubert_ukrainian_pipeline_uk_5.5.1_3.0_1731284523933.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubert_ukrainian_pipeline_uk_5.5.1_3.0_1731284523933.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hubert_ukrainian_pipeline", lang = "uk") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hubert_ukrainian_pipeline", lang = "uk") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubert_ukrainian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|uk| +|Size:|708.6 MB| + +## References + +https://huggingface.co/Yehor/hubert-uk + +## Included Models + +- AudioAssembler +- HubertForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hubert_ukrainian_uk.md b/docs/_posts/ahmedlone127/2024-11-11-hubert_ukrainian_uk.md new file mode 100644 index 00000000000000..6e6498989a4c9b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hubert_ukrainian_uk.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Ukrainian hubert_ukrainian HubertForCTC from Yehor +author: John Snow Labs +name: hubert_ukrainian +date: 2024-11-11 +tags: [uk, open_source, onnx, asr, hubert] +task: Automatic Speech Recognition +language: uk +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: HubertForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubert_ukrainian` is a Ukrainian model originally trained by Yehor. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubert_ukrainian_uk_5.5.1_3.0_1731284486012.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubert_ukrainian_uk_5.5.1_3.0_1731284486012.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = HubertForCTC.pretrained("hubert_ukrainian","uk") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = HubertForCTC.pretrained("hubert_ukrainian", "uk") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubert_ukrainian| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|uk| +|Size:|708.6 MB| + +## References + +https://huggingface.co/Yehor/hubert-uk \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hw1_2_question_answering_bert_base_chinese_finetuned_en.md b/docs/_posts/ahmedlone127/2024-11-11-hw1_2_question_answering_bert_base_chinese_finetuned_en.md new file mode 100644 index 00000000000000..78f9ba44376001 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hw1_2_question_answering_bert_base_chinese_finetuned_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English hw1_2_question_answering_bert_base_chinese_finetuned BertForQuestionAnswering from b10401015 +author: John Snow Labs +name: hw1_2_question_answering_bert_base_chinese_finetuned +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hw1_2_question_answering_bert_base_chinese_finetuned` is a English model originally trained by b10401015. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hw1_2_question_answering_bert_base_chinese_finetuned_en_5.5.1_3.0_1731289448071.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hw1_2_question_answering_bert_base_chinese_finetuned_en_5.5.1_3.0_1731289448071.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("hw1_2_question_answering_bert_base_chinese_finetuned","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("hw1_2_question_answering_bert_base_chinese_finetuned", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hw1_2_question_answering_bert_base_chinese_finetuned| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|381.1 MB| + +## References + +https://huggingface.co/b10401015/hw1-2-question_answering-bert-base-chinese-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-hw1_2_question_answering_bert_base_chinese_finetuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-hw1_2_question_answering_bert_base_chinese_finetuned_pipeline_en.md new file mode 100644 index 00000000000000..27192cee5c112e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-hw1_2_question_answering_bert_base_chinese_finetuned_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English hw1_2_question_answering_bert_base_chinese_finetuned_pipeline pipeline BertForQuestionAnswering from b10401015 +author: John Snow Labs +name: hw1_2_question_answering_bert_base_chinese_finetuned_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hw1_2_question_answering_bert_base_chinese_finetuned_pipeline` is a English model originally trained by b10401015. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hw1_2_question_answering_bert_base_chinese_finetuned_pipeline_en_5.5.1_3.0_1731289468089.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hw1_2_question_answering_bert_base_chinese_finetuned_pipeline_en_5.5.1_3.0_1731289468089.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hw1_2_question_answering_bert_base_chinese_finetuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hw1_2_question_answering_bert_base_chinese_finetuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hw1_2_question_answering_bert_base_chinese_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|381.1 MB| + +## References + +https://huggingface.co/b10401015/hw1-2-question_answering-bert-base-chinese-finetuned + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-indobert_squad_indonesian_en.md b/docs/_posts/ahmedlone127/2024-11-11-indobert_squad_indonesian_en.md new file mode 100644 index 00000000000000..83f7c5bb4fd845 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-indobert_squad_indonesian_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English indobert_squad_indonesian BertForQuestionAnswering from malaputri +author: John Snow Labs +name: indobert_squad_indonesian +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indobert_squad_indonesian` is a English model originally trained by malaputri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indobert_squad_indonesian_en_5.5.1_3.0_1731288942482.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indobert_squad_indonesian_en_5.5.1_3.0_1731288942482.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("indobert_squad_indonesian","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("indobert_squad_indonesian", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indobert_squad_indonesian| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/malaputri/indobert-squad-id \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-indobert_squad_indonesian_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-indobert_squad_indonesian_pipeline_en.md new file mode 100644 index 00000000000000..0e68cf8f231ab5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-indobert_squad_indonesian_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English indobert_squad_indonesian_pipeline pipeline BertForQuestionAnswering from malaputri +author: John Snow Labs +name: indobert_squad_indonesian_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indobert_squad_indonesian_pipeline` is a English model originally trained by malaputri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indobert_squad_indonesian_pipeline_en_5.5.1_3.0_1731288966242.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indobert_squad_indonesian_pipeline_en_5.5.1_3.0_1731288966242.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("indobert_squad_indonesian_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("indobert_squad_indonesian_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indobert_squad_indonesian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/malaputri/indobert-squad-id + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-indonesian_roberta_base_nerp_tagger_en.md b/docs/_posts/ahmedlone127/2024-11-11-indonesian_roberta_base_nerp_tagger_en.md new file mode 100644 index 00000000000000..20ab05f1228396 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-indonesian_roberta_base_nerp_tagger_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English indonesian_roberta_base_nerp_tagger RoBertaForTokenClassification from w11wo +author: John Snow Labs +name: indonesian_roberta_base_nerp_tagger +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indonesian_roberta_base_nerp_tagger` is a English model originally trained by w11wo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indonesian_roberta_base_nerp_tagger_en_5.5.1_3.0_1731311367416.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indonesian_roberta_base_nerp_tagger_en_5.5.1_3.0_1731311367416.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("indonesian_roberta_base_nerp_tagger","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("indonesian_roberta_base_nerp_tagger", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indonesian_roberta_base_nerp_tagger| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|465.5 MB| + +## References + +https://huggingface.co/w11wo/indonesian-roberta-base-nerp-tagger \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-indonesian_roberta_base_nerp_tagger_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-indonesian_roberta_base_nerp_tagger_pipeline_en.md new file mode 100644 index 00000000000000..ed62781eb46655 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-indonesian_roberta_base_nerp_tagger_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English indonesian_roberta_base_nerp_tagger_pipeline pipeline RoBertaForTokenClassification from w11wo +author: John Snow Labs +name: indonesian_roberta_base_nerp_tagger_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indonesian_roberta_base_nerp_tagger_pipeline` is a English model originally trained by w11wo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indonesian_roberta_base_nerp_tagger_pipeline_en_5.5.1_3.0_1731311391967.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indonesian_roberta_base_nerp_tagger_pipeline_en_5.5.1_3.0_1731311391967.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("indonesian_roberta_base_nerp_tagger_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("indonesian_roberta_base_nerp_tagger_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indonesian_roberta_base_nerp_tagger_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.5 MB| + +## References + +https://huggingface.co/w11wo/indonesian-roberta-base-nerp-tagger + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-kaggle_detect_generated_text_en.md b/docs/_posts/ahmedlone127/2024-11-11-kaggle_detect_generated_text_en.md new file mode 100644 index 00000000000000..eac97868008a6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-kaggle_detect_generated_text_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English kaggle_detect_generated_text BertForSequenceClassification from fagner +author: John Snow Labs +name: kaggle_detect_generated_text +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kaggle_detect_generated_text` is a English model originally trained by fagner. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kaggle_detect_generated_text_en_5.5.1_3.0_1731309407438.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kaggle_detect_generated_text_en_5.5.1_3.0_1731309407438.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("kaggle_detect_generated_text","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("kaggle_detect_generated_text", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kaggle_detect_generated_text| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/fagner/kaggle-detect-generated-text \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-kaggle_detect_generated_text_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-kaggle_detect_generated_text_pipeline_en.md new file mode 100644 index 00000000000000..bc1c2e832b4942 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-kaggle_detect_generated_text_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English kaggle_detect_generated_text_pipeline pipeline BertForSequenceClassification from fagner +author: John Snow Labs +name: kaggle_detect_generated_text_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kaggle_detect_generated_text_pipeline` is a English model originally trained by fagner. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kaggle_detect_generated_text_pipeline_en_5.5.1_3.0_1731309428785.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kaggle_detect_generated_text_pipeline_en_5.5.1_3.0_1731309428785.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("kaggle_detect_generated_text_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("kaggle_detect_generated_text_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kaggle_detect_generated_text_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/fagner/kaggle-detect-generated-text + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-keybert_bulgarian_bg.md b/docs/_posts/ahmedlone127/2024-11-11-keybert_bulgarian_bg.md new file mode 100644 index 00000000000000..b2cfa011512d1f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-keybert_bulgarian_bg.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Bulgarian keybert_bulgarian BertForTokenClassification from auhide +author: John Snow Labs +name: keybert_bulgarian +date: 2024-11-11 +tags: [bg, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: bg +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`keybert_bulgarian` is a Bulgarian model originally trained by auhide. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/keybert_bulgarian_bg_5.5.1_3.0_1731299301822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/keybert_bulgarian_bg_5.5.1_3.0_1731299301822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("keybert_bulgarian","bg") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("keybert_bulgarian", "bg") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|keybert_bulgarian| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|bg| +|Size:|665.0 MB| + +## References + +https://huggingface.co/auhide/keybert-bg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-keybert_bulgarian_pipeline_bg.md b/docs/_posts/ahmedlone127/2024-11-11-keybert_bulgarian_pipeline_bg.md new file mode 100644 index 00000000000000..9949e29c23a12b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-keybert_bulgarian_pipeline_bg.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Bulgarian keybert_bulgarian_pipeline pipeline BertForTokenClassification from auhide +author: John Snow Labs +name: keybert_bulgarian_pipeline +date: 2024-11-11 +tags: [bg, open_source, pipeline, onnx] +task: Named Entity Recognition +language: bg +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`keybert_bulgarian_pipeline` is a Bulgarian model originally trained by auhide. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/keybert_bulgarian_pipeline_bg_5.5.1_3.0_1731299335640.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/keybert_bulgarian_pipeline_bg_5.5.1_3.0_1731299335640.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("keybert_bulgarian_pipeline", lang = "bg") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("keybert_bulgarian_pipeline", lang = "bg") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|keybert_bulgarian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|bg| +|Size:|665.1 MB| + +## References + +https://huggingface.co/auhide/keybert-bg + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-kor_naver_ner_name_v2_en.md b/docs/_posts/ahmedlone127/2024-11-11-kor_naver_ner_name_v2_en.md new file mode 100644 index 00000000000000..40a7461b0e818f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-kor_naver_ner_name_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English kor_naver_ner_name_v2 BertForTokenClassification from joon09 +author: John Snow Labs +name: kor_naver_ner_name_v2 +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kor_naver_ner_name_v2` is a English model originally trained by joon09. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kor_naver_ner_name_v2_en_5.5.1_3.0_1731291030118.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kor_naver_ner_name_v2_en_5.5.1_3.0_1731291030118.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("kor_naver_ner_name_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("kor_naver_ner_name_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kor_naver_ner_name_v2| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|441.2 MB| + +## References + +https://huggingface.co/joon09/kor-naver-ner-name-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-kor_naver_ner_name_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-kor_naver_ner_name_v2_pipeline_en.md new file mode 100644 index 00000000000000..7d1d6374b024e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-kor_naver_ner_name_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English kor_naver_ner_name_v2_pipeline pipeline BertForTokenClassification from joon09 +author: John Snow Labs +name: kor_naver_ner_name_v2_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kor_naver_ner_name_v2_pipeline` is a English model originally trained by joon09. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kor_naver_ner_name_v2_pipeline_en_5.5.1_3.0_1731291052670.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kor_naver_ner_name_v2_pipeline_en_5.5.1_3.0_1731291052670.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("kor_naver_ner_name_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("kor_naver_ner_name_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kor_naver_ner_name_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|441.3 MB| + +## References + +https://huggingface.co/joon09/kor-naver-ner-name-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-legal_gqa_7_bert_augmented_all_1000_en.md b/docs/_posts/ahmedlone127/2024-11-11-legal_gqa_7_bert_augmented_all_1000_en.md new file mode 100644 index 00000000000000..93178bb39d4a64 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-legal_gqa_7_bert_augmented_all_1000_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English legal_gqa_7_bert_augmented_all_1000 BertForQuestionAnswering from farid1088 +author: John Snow Labs +name: legal_gqa_7_bert_augmented_all_1000 +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_gqa_7_bert_augmented_all_1000` is a English model originally trained by farid1088. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_gqa_7_bert_augmented_all_1000_en_5.5.1_3.0_1731289218328.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_gqa_7_bert_augmented_all_1000_en_5.5.1_3.0_1731289218328.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("legal_gqa_7_bert_augmented_all_1000","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("legal_gqa_7_bert_augmented_all_1000", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_gqa_7_bert_augmented_all_1000| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/farid1088/Legal_GQA_7_BERT_augmented_all_1000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-legal_gqa_7_bert_augmented_all_1000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-legal_gqa_7_bert_augmented_all_1000_pipeline_en.md new file mode 100644 index 00000000000000..9c0b67bc6ada0d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-legal_gqa_7_bert_augmented_all_1000_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English legal_gqa_7_bert_augmented_all_1000_pipeline pipeline BertForQuestionAnswering from farid1088 +author: John Snow Labs +name: legal_gqa_7_bert_augmented_all_1000_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_gqa_7_bert_augmented_all_1000_pipeline` is a English model originally trained by farid1088. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_gqa_7_bert_augmented_all_1000_pipeline_en_5.5.1_3.0_1731289252285.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_gqa_7_bert_augmented_all_1000_pipeline_en_5.5.1_3.0_1731289252285.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("legal_gqa_7_bert_augmented_all_1000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("legal_gqa_7_bert_augmented_all_1000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_gqa_7_bert_augmented_all_1000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/farid1088/Legal_GQA_7_BERT_augmented_all_1000 + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-lettuce_sayula_popoluca_dutch_mono_en.md b/docs/_posts/ahmedlone127/2024-11-11-lettuce_sayula_popoluca_dutch_mono_en.md new file mode 100644 index 00000000000000..ff6f51c90581f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-lettuce_sayula_popoluca_dutch_mono_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English lettuce_sayula_popoluca_dutch_mono RoBertaForTokenClassification from pranaydeeps +author: John Snow Labs +name: lettuce_sayula_popoluca_dutch_mono +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lettuce_sayula_popoluca_dutch_mono` is a English model originally trained by pranaydeeps. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lettuce_sayula_popoluca_dutch_mono_en_5.5.1_3.0_1731314245313.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lettuce_sayula_popoluca_dutch_mono_en_5.5.1_3.0_1731314245313.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("lettuce_sayula_popoluca_dutch_mono","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("lettuce_sayula_popoluca_dutch_mono", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lettuce_sayula_popoluca_dutch_mono| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|436.2 MB| + +## References + +https://huggingface.co/pranaydeeps/lettuce_pos_nl_mono \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-lettuce_sayula_popoluca_dutch_mono_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-lettuce_sayula_popoluca_dutch_mono_pipeline_en.md new file mode 100644 index 00000000000000..30c2a8037a1fce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-lettuce_sayula_popoluca_dutch_mono_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lettuce_sayula_popoluca_dutch_mono_pipeline pipeline RoBertaForTokenClassification from pranaydeeps +author: John Snow Labs +name: lettuce_sayula_popoluca_dutch_mono_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lettuce_sayula_popoluca_dutch_mono_pipeline` is a English model originally trained by pranaydeeps. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lettuce_sayula_popoluca_dutch_mono_pipeline_en_5.5.1_3.0_1731314271572.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lettuce_sayula_popoluca_dutch_mono_pipeline_en_5.5.1_3.0_1731314271572.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lettuce_sayula_popoluca_dutch_mono_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lettuce_sayula_popoluca_dutch_mono_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lettuce_sayula_popoluca_dutch_mono_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|436.2 MB| + +## References + +https://huggingface.co/pranaydeeps/lettuce_pos_nl_mono + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-linshoufanfork_whisper_small_nan_twi_pinyin_en.md b/docs/_posts/ahmedlone127/2024-11-11-linshoufanfork_whisper_small_nan_twi_pinyin_en.md new file mode 100644 index 00000000000000..c49affe661f3d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-linshoufanfork_whisper_small_nan_twi_pinyin_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English linshoufanfork_whisper_small_nan_twi_pinyin WhisperForCTC from linshoufan +author: John Snow Labs +name: linshoufanfork_whisper_small_nan_twi_pinyin +date: 2024-11-11 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`linshoufanfork_whisper_small_nan_twi_pinyin` is a English model originally trained by linshoufan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/linshoufanfork_whisper_small_nan_twi_pinyin_en_5.5.1_3.0_1731302380184.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/linshoufanfork_whisper_small_nan_twi_pinyin_en_5.5.1_3.0_1731302380184.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("linshoufanfork_whisper_small_nan_twi_pinyin","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("linshoufanfork_whisper_small_nan_twi_pinyin", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|linshoufanfork_whisper_small_nan_twi_pinyin| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/linshoufan/linshoufanfork-whisper-small-nan-tw-pinyin \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-linshoufanfork_whisper_small_nan_twi_pinyin_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-linshoufanfork_whisper_small_nan_twi_pinyin_pipeline_en.md new file mode 100644 index 00000000000000..b2662dfecf5220 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-linshoufanfork_whisper_small_nan_twi_pinyin_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English linshoufanfork_whisper_small_nan_twi_pinyin_pipeline pipeline WhisperForCTC from linshoufan +author: John Snow Labs +name: linshoufanfork_whisper_small_nan_twi_pinyin_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`linshoufanfork_whisper_small_nan_twi_pinyin_pipeline` is a English model originally trained by linshoufan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/linshoufanfork_whisper_small_nan_twi_pinyin_pipeline_en_5.5.1_3.0_1731302466551.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/linshoufanfork_whisper_small_nan_twi_pinyin_pipeline_en_5.5.1_3.0_1731302466551.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("linshoufanfork_whisper_small_nan_twi_pinyin_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("linshoufanfork_whisper_small_nan_twi_pinyin_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|linshoufanfork_whisper_small_nan_twi_pinyin_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/linshoufan/linshoufanfork-whisper-small-nan-tw-pinyin + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-malaysian_whisper_tiny_ms.md b/docs/_posts/ahmedlone127/2024-11-11-malaysian_whisper_tiny_ms.md new file mode 100644 index 00000000000000..d5635f50e1d8cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-malaysian_whisper_tiny_ms.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Malay (macrolanguage) malaysian_whisper_tiny WhisperForCTC from mesolitica +author: John Snow Labs +name: malaysian_whisper_tiny +date: 2024-11-11 +tags: [ms, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ms +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malaysian_whisper_tiny` is a Malay (macrolanguage) model originally trained by mesolitica. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malaysian_whisper_tiny_ms_5.5.1_3.0_1731305964206.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malaysian_whisper_tiny_ms_5.5.1_3.0_1731305964206.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("malaysian_whisper_tiny","ms") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("malaysian_whisper_tiny", "ms") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malaysian_whisper_tiny| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ms| +|Size:|191.1 MB| + +## References + +https://huggingface.co/mesolitica/malaysian-whisper-tiny \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-malaysian_whisper_tiny_pipeline_ms.md b/docs/_posts/ahmedlone127/2024-11-11-malaysian_whisper_tiny_pipeline_ms.md new file mode 100644 index 00000000000000..68a49271f19872 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-malaysian_whisper_tiny_pipeline_ms.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Malay (macrolanguage) malaysian_whisper_tiny_pipeline pipeline WhisperForCTC from mesolitica +author: John Snow Labs +name: malaysian_whisper_tiny_pipeline +date: 2024-11-11 +tags: [ms, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ms +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malaysian_whisper_tiny_pipeline` is a Malay (macrolanguage) model originally trained by mesolitica. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malaysian_whisper_tiny_pipeline_ms_5.5.1_3.0_1731306024102.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malaysian_whisper_tiny_pipeline_ms_5.5.1_3.0_1731306024102.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("malaysian_whisper_tiny_pipeline", lang = "ms") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("malaysian_whisper_tiny_pipeline", lang = "ms") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malaysian_whisper_tiny_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ms| +|Size:|191.1 MB| + +## References + +https://huggingface.co/mesolitica/malaysian-whisper-tiny + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-mbert_finetuned_mlqa_dev_spanish_chinese_hindi_en.md b/docs/_posts/ahmedlone127/2024-11-11-mbert_finetuned_mlqa_dev_spanish_chinese_hindi_en.md new file mode 100644 index 00000000000000..a24fea30c337a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-mbert_finetuned_mlqa_dev_spanish_chinese_hindi_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mbert_finetuned_mlqa_dev_spanish_chinese_hindi BertForQuestionAnswering from roshnir +author: John Snow Labs +name: mbert_finetuned_mlqa_dev_spanish_chinese_hindi +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_finetuned_mlqa_dev_spanish_chinese_hindi` is a English model originally trained by roshnir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_finetuned_mlqa_dev_spanish_chinese_hindi_en_5.5.1_3.0_1731308064944.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_finetuned_mlqa_dev_spanish_chinese_hindi_en_5.5.1_3.0_1731308064944.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("mbert_finetuned_mlqa_dev_spanish_chinese_hindi","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("mbert_finetuned_mlqa_dev_spanish_chinese_hindi", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_finetuned_mlqa_dev_spanish_chinese_hindi| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|625.5 MB| + +## References + +https://huggingface.co/roshnir/mBert-finetuned-mlqa-dev-es-zh-hi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-mbert_finetuned_mlqa_dev_spanish_chinese_hindi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-mbert_finetuned_mlqa_dev_spanish_chinese_hindi_pipeline_en.md new file mode 100644 index 00000000000000..092368edd847aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-mbert_finetuned_mlqa_dev_spanish_chinese_hindi_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English mbert_finetuned_mlqa_dev_spanish_chinese_hindi_pipeline pipeline BertForQuestionAnswering from roshnir +author: John Snow Labs +name: mbert_finetuned_mlqa_dev_spanish_chinese_hindi_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_finetuned_mlqa_dev_spanish_chinese_hindi_pipeline` is a English model originally trained by roshnir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_finetuned_mlqa_dev_spanish_chinese_hindi_pipeline_en_5.5.1_3.0_1731308096792.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_finetuned_mlqa_dev_spanish_chinese_hindi_pipeline_en_5.5.1_3.0_1731308096792.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mbert_finetuned_mlqa_dev_spanish_chinese_hindi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mbert_finetuned_mlqa_dev_spanish_chinese_hindi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_finetuned_mlqa_dev_spanish_chinese_hindi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|625.5 MB| + +## References + +https://huggingface.co/roshnir/mBert-finetuned-mlqa-dev-es-zh-hi + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-mbert_urdu_en.md b/docs/_posts/ahmedlone127/2024-11-11-mbert_urdu_en.md new file mode 100644 index 00000000000000..daca24f181aa89 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-mbert_urdu_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mbert_urdu BertForTokenClassification from anwesham +author: John Snow Labs +name: mbert_urdu +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_urdu` is a English model originally trained by anwesham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_urdu_en_5.5.1_3.0_1731285721176.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_urdu_en_5.5.1_3.0_1731285721176.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("mbert_urdu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("mbert_urdu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_urdu| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/anwesham/mbert_ur \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-mbert_urdu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-mbert_urdu_pipeline_en.md new file mode 100644 index 00000000000000..eea33b8a7387db --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-mbert_urdu_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mbert_urdu_pipeline pipeline BertForTokenClassification from anwesham +author: John Snow Labs +name: mbert_urdu_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_urdu_pipeline` is a English model originally trained by anwesham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_urdu_pipeline_en_5.5.1_3.0_1731285756767.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_urdu_pipeline_en_5.5.1_3.0_1731285756767.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mbert_urdu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mbert_urdu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_urdu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/anwesham/mbert_ur + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-med_drugs_extraction_b_en.md b/docs/_posts/ahmedlone127/2024-11-11-med_drugs_extraction_b_en.md new file mode 100644 index 00000000000000..a410af14a15ad0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-med_drugs_extraction_b_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English med_drugs_extraction_b BertForQuestionAnswering from iliyararupzhanov +author: John Snow Labs +name: med_drugs_extraction_b +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`med_drugs_extraction_b` is a English model originally trained by iliyararupzhanov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/med_drugs_extraction_b_en_5.5.1_3.0_1731289511976.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/med_drugs_extraction_b_en_5.5.1_3.0_1731289511976.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("med_drugs_extraction_b","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("med_drugs_extraction_b", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|med_drugs_extraction_b| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|664.3 MB| + +## References + +https://huggingface.co/iliyararupzhanov/med-drugs-extraction-b \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-med_drugs_extraction_b_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-med_drugs_extraction_b_pipeline_en.md new file mode 100644 index 00000000000000..b213293f6bfc73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-med_drugs_extraction_b_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English med_drugs_extraction_b_pipeline pipeline BertForQuestionAnswering from iliyararupzhanov +author: John Snow Labs +name: med_drugs_extraction_b_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`med_drugs_extraction_b_pipeline` is a English model originally trained by iliyararupzhanov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/med_drugs_extraction_b_pipeline_en_5.5.1_3.0_1731289549276.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/med_drugs_extraction_b_pipeline_en_5.5.1_3.0_1731289549276.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("med_drugs_extraction_b_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("med_drugs_extraction_b_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|med_drugs_extraction_b_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|664.3 MB| + +## References + +https://huggingface.co/iliyararupzhanov/med-drugs-extraction-b + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-men_tshirt_en.md b/docs/_posts/ahmedlone127/2024-11-11-men_tshirt_en.md new file mode 100644 index 00000000000000..a6ed40de416d2a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-men_tshirt_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English men_tshirt CLIPForZeroShotClassification from HarshN-0722 +author: John Snow Labs +name: men_tshirt +date: 2024-11-11 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`men_tshirt` is a English model originally trained by HarshN-0722. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/men_tshirt_en_5.5.1_3.0_1731287421873.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/men_tshirt_en_5.5.1_3.0_1731287421873.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("men_tshirt","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("men_tshirt","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|men_tshirt| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|567.3 MB| + +## References + +https://huggingface.co/HarshN-0722/men-tshirt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-men_tshirt_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-men_tshirt_pipeline_en.md new file mode 100644 index 00000000000000..8962b560cf8538 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-men_tshirt_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English men_tshirt_pipeline pipeline CLIPForZeroShotClassification from HarshN-0722 +author: John Snow Labs +name: men_tshirt_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`men_tshirt_pipeline` is a English model originally trained by HarshN-0722. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/men_tshirt_pipeline_en_5.5.1_3.0_1731287450972.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/men_tshirt_pipeline_en_5.5.1_3.0_1731287450972.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("men_tshirt_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("men_tshirt_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|men_tshirt_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|567.3 MB| + +## References + +https://huggingface.co/HarshN-0722/men-tshirt + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-mi_chatbotv3_en.md b/docs/_posts/ahmedlone127/2024-11-11-mi_chatbotv3_en.md new file mode 100644 index 00000000000000..18d56dc54176f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-mi_chatbotv3_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mi_chatbotv3 BertForQuestionAnswering from DanielAvelar09 +author: John Snow Labs +name: mi_chatbotv3 +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mi_chatbotv3` is a English model originally trained by DanielAvelar09. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mi_chatbotv3_en_5.5.1_3.0_1731307787887.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mi_chatbotv3_en_5.5.1_3.0_1731307787887.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("mi_chatbotv3","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("mi_chatbotv3", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mi_chatbotv3| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/DanielAvelar09/mi_chatbotV3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-mi_chatbotv3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-mi_chatbotv3_pipeline_en.md new file mode 100644 index 00000000000000..26d886362fb586 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-mi_chatbotv3_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English mi_chatbotv3_pipeline pipeline BertForQuestionAnswering from DanielAvelar09 +author: John Snow Labs +name: mi_chatbotv3_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mi_chatbotv3_pipeline` is a English model originally trained by DanielAvelar09. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mi_chatbotv3_pipeline_en_5.5.1_3.0_1731307810155.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mi_chatbotv3_pipeline_en_5.5.1_3.0_1731307810155.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mi_chatbotv3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mi_chatbotv3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mi_chatbotv3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/DanielAvelar09/mi_chatbotV3 + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-mobilebert_uncased_squad_v2_finetuned_en.md b/docs/_posts/ahmedlone127/2024-11-11-mobilebert_uncased_squad_v2_finetuned_en.md new file mode 100644 index 00000000000000..4d6769c4aab15c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-mobilebert_uncased_squad_v2_finetuned_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mobilebert_uncased_squad_v2_finetuned BertForQuestionAnswering from badokorach +author: John Snow Labs +name: mobilebert_uncased_squad_v2_finetuned +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mobilebert_uncased_squad_v2_finetuned` is a English model originally trained by badokorach. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mobilebert_uncased_squad_v2_finetuned_en_5.5.1_3.0_1731289026811.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mobilebert_uncased_squad_v2_finetuned_en_5.5.1_3.0_1731289026811.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("mobilebert_uncased_squad_v2_finetuned","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("mobilebert_uncased_squad_v2_finetuned", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mobilebert_uncased_squad_v2_finetuned| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|92.5 MB| + +## References + +https://huggingface.co/badokorach/mobilebert-uncased-squad-v2-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-mobilebert_uncased_squad_v2_finetuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-mobilebert_uncased_squad_v2_finetuned_pipeline_en.md new file mode 100644 index 00000000000000..8508a5a42095a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-mobilebert_uncased_squad_v2_finetuned_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English mobilebert_uncased_squad_v2_finetuned_pipeline pipeline BertForQuestionAnswering from badokorach +author: John Snow Labs +name: mobilebert_uncased_squad_v2_finetuned_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mobilebert_uncased_squad_v2_finetuned_pipeline` is a English model originally trained by badokorach. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mobilebert_uncased_squad_v2_finetuned_pipeline_en_5.5.1_3.0_1731289031710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mobilebert_uncased_squad_v2_finetuned_pipeline_en_5.5.1_3.0_1731289031710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mobilebert_uncased_squad_v2_finetuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mobilebert_uncased_squad_v2_finetuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mobilebert_uncased_squad_v2_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|92.5 MB| + +## References + +https://huggingface.co/badokorach/mobilebert-uncased-squad-v2-finetuned + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-mountain_ner_model_en.md b/docs/_posts/ahmedlone127/2024-11-11-mountain_ner_model_en.md new file mode 100644 index 00000000000000..29d92875ae4cb6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-mountain_ner_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mountain_ner_model BertForTokenClassification from Shah1st +author: John Snow Labs +name: mountain_ner_model +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mountain_ner_model` is a English model originally trained by Shah1st. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mountain_ner_model_en_5.5.1_3.0_1731298624923.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mountain_ner_model_en_5.5.1_3.0_1731298624923.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("mountain_ner_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("mountain_ner_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mountain_ner_model| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/Shah1st/mountain-ner-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-mountain_ner_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-mountain_ner_model_pipeline_en.md new file mode 100644 index 00000000000000..41894220bbe460 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-mountain_ner_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mountain_ner_model_pipeline pipeline BertForTokenClassification from Shah1st +author: John Snow Labs +name: mountain_ner_model_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mountain_ner_model_pipeline` is a English model originally trained by Shah1st. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mountain_ner_model_pipeline_en_5.5.1_3.0_1731298688183.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mountain_ner_model_pipeline_en_5.5.1_3.0_1731298688183.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mountain_ner_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mountain_ner_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mountain_ner_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/Shah1st/mountain-ner-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-mountain_recognition_ner_en.md b/docs/_posts/ahmedlone127/2024-11-11-mountain_recognition_ner_en.md new file mode 100644 index 00000000000000..acfcc435fb47a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-mountain_recognition_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mountain_recognition_ner BertForTokenClassification from dieumerci +author: John Snow Labs +name: mountain_recognition_ner +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mountain_recognition_ner` is a English model originally trained by dieumerci. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mountain_recognition_ner_en_5.5.1_3.0_1731290821656.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mountain_recognition_ner_en_5.5.1_3.0_1731290821656.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("mountain_recognition_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("mountain_recognition_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mountain_recognition_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/dieumerci/mountain-recognition-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-mountain_recognition_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-mountain_recognition_ner_pipeline_en.md new file mode 100644 index 00000000000000..3ead37a9c09f49 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-mountain_recognition_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mountain_recognition_ner_pipeline pipeline BertForTokenClassification from dieumerci +author: John Snow Labs +name: mountain_recognition_ner_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mountain_recognition_ner_pipeline` is a English model originally trained by dieumerci. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mountain_recognition_ner_pipeline_en_5.5.1_3.0_1731290884462.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mountain_recognition_ner_pipeline_en_5.5.1_3.0_1731290884462.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mountain_recognition_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mountain_recognition_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mountain_recognition_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/dieumerci/mountain-recognition-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-mpnet_base_all_pittsburgh_squad_en.md b/docs/_posts/ahmedlone127/2024-11-11-mpnet_base_all_pittsburgh_squad_en.md new file mode 100644 index 00000000000000..4e012e211c5f49 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-mpnet_base_all_pittsburgh_squad_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mpnet_base_all_pittsburgh_squad MPNetEmbeddings from lizchu414 +author: John Snow Labs +name: mpnet_base_all_pittsburgh_squad +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_all_pittsburgh_squad` is a English model originally trained by lizchu414. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_all_pittsburgh_squad_en_5.5.1_3.0_1731295097151.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_all_pittsburgh_squad_en_5.5.1_3.0_1731295097151.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("mpnet_base_all_pittsburgh_squad","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("mpnet_base_all_pittsburgh_squad","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_all_pittsburgh_squad| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/lizchu414/mpnet-base-all-pittsburgh-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-mpnet_base_all_pittsburgh_squad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-mpnet_base_all_pittsburgh_squad_pipeline_en.md new file mode 100644 index 00000000000000..98d7deca55b9c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-mpnet_base_all_pittsburgh_squad_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English mpnet_base_all_pittsburgh_squad_pipeline pipeline MPNetEmbeddings from lizchu414 +author: John Snow Labs +name: mpnet_base_all_pittsburgh_squad_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_all_pittsburgh_squad_pipeline` is a English model originally trained by lizchu414. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_all_pittsburgh_squad_pipeline_en_5.5.1_3.0_1731295123391.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_all_pittsburgh_squad_pipeline_en_5.5.1_3.0_1731295123391.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mpnet_base_all_pittsburgh_squad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mpnet_base_all_pittsburgh_squad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_all_pittsburgh_squad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/lizchu414/mpnet-base-all-pittsburgh-squad + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-msu_wiki_ner_pipeline_ru.md b/docs/_posts/ahmedlone127/2024-11-11-msu_wiki_ner_pipeline_ru.md new file mode 100644 index 00000000000000..052a34fd1292cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-msu_wiki_ner_pipeline_ru.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Russian msu_wiki_ner_pipeline pipeline BertForTokenClassification from nesemenpolkov +author: John Snow Labs +name: msu_wiki_ner_pipeline +date: 2024-11-11 +tags: [ru, open_source, pipeline, onnx] +task: Named Entity Recognition +language: ru +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`msu_wiki_ner_pipeline` is a Russian model originally trained by nesemenpolkov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/msu_wiki_ner_pipeline_ru_5.5.1_3.0_1731298735156.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/msu_wiki_ner_pipeline_ru_5.5.1_3.0_1731298735156.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("msu_wiki_ner_pipeline", lang = "ru") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("msu_wiki_ner_pipeline", lang = "ru") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|msu_wiki_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ru| +|Size:|665.1 MB| + +## References + +https://huggingface.co/nesemenpolkov/msu-wiki-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-msu_wiki_ner_ru.md b/docs/_posts/ahmedlone127/2024-11-11-msu_wiki_ner_ru.md new file mode 100644 index 00000000000000..f9978264c43363 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-msu_wiki_ner_ru.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Russian msu_wiki_ner BertForTokenClassification from nesemenpolkov +author: John Snow Labs +name: msu_wiki_ner +date: 2024-11-11 +tags: [ru, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: ru +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`msu_wiki_ner` is a Russian model originally trained by nesemenpolkov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/msu_wiki_ner_ru_5.5.1_3.0_1731298695259.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/msu_wiki_ner_ru_5.5.1_3.0_1731298695259.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("msu_wiki_ner","ru") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("msu_wiki_ner", "ru") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|msu_wiki_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|ru| +|Size:|665.1 MB| + +## References + +https://huggingface.co/nesemenpolkov/msu-wiki-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-nasa_smd_ibm_v0_1_uat_labeler_en.md b/docs/_posts/ahmedlone127/2024-11-11-nasa_smd_ibm_v0_1_uat_labeler_en.md new file mode 100644 index 00000000000000..bb7386e46fce77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-nasa_smd_ibm_v0_1_uat_labeler_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nasa_smd_ibm_v0_1_uat_labeler RoBertaForTokenClassification from adsabs +author: John Snow Labs +name: nasa_smd_ibm_v0_1_uat_labeler +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nasa_smd_ibm_v0_1_uat_labeler` is a English model originally trained by adsabs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nasa_smd_ibm_v0_1_uat_labeler_en_5.5.1_3.0_1731310993704.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nasa_smd_ibm_v0_1_uat_labeler_en_5.5.1_3.0_1731310993704.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("nasa_smd_ibm_v0_1_uat_labeler","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("nasa_smd_ibm_v0_1_uat_labeler", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nasa_smd_ibm_v0_1_uat_labeler| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|472.8 MB| + +## References + +https://huggingface.co/adsabs/nasa-smd-ibm-v0.1_UAT_Labeler \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-nasa_smd_ibm_v0_1_uat_labeler_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-nasa_smd_ibm_v0_1_uat_labeler_pipeline_en.md new file mode 100644 index 00000000000000..1d21f37f04d8ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-nasa_smd_ibm_v0_1_uat_labeler_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nasa_smd_ibm_v0_1_uat_labeler_pipeline pipeline RoBertaForTokenClassification from adsabs +author: John Snow Labs +name: nasa_smd_ibm_v0_1_uat_labeler_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nasa_smd_ibm_v0_1_uat_labeler_pipeline` is a English model originally trained by adsabs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nasa_smd_ibm_v0_1_uat_labeler_pipeline_en_5.5.1_3.0_1731311018084.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nasa_smd_ibm_v0_1_uat_labeler_pipeline_en_5.5.1_3.0_1731311018084.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nasa_smd_ibm_v0_1_uat_labeler_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nasa_smd_ibm_v0_1_uat_labeler_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nasa_smd_ibm_v0_1_uat_labeler_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|472.8 MB| + +## References + +https://huggingface.co/adsabs/nasa-smd-ibm-v0.1_UAT_Labeler + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-nees_bert_base_portuguese_cased_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-11-11-nees_bert_base_portuguese_cased_finetuned_ner_en.md new file mode 100644 index 00000000000000..23812f06211044 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-nees_bert_base_portuguese_cased_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nees_bert_base_portuguese_cased_finetuned_ner BertForTokenClassification from yuridrcosta +author: John Snow Labs +name: nees_bert_base_portuguese_cased_finetuned_ner +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nees_bert_base_portuguese_cased_finetuned_ner` is a English model originally trained by yuridrcosta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nees_bert_base_portuguese_cased_finetuned_ner_en_5.5.1_3.0_1731290402339.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nees_bert_base_portuguese_cased_finetuned_ner_en_5.5.1_3.0_1731290402339.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("nees_bert_base_portuguese_cased_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("nees_bert_base_portuguese_cased_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nees_bert_base_portuguese_cased_finetuned_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|406.0 MB| + +## References + +https://huggingface.co/yuridrcosta/nees-bert-base-portuguese-cased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-nees_bert_base_portuguese_cased_finetuned_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-nees_bert_base_portuguese_cased_finetuned_ner_pipeline_en.md new file mode 100644 index 00000000000000..62bd04fcc78147 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-nees_bert_base_portuguese_cased_finetuned_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nees_bert_base_portuguese_cased_finetuned_ner_pipeline pipeline BertForTokenClassification from yuridrcosta +author: John Snow Labs +name: nees_bert_base_portuguese_cased_finetuned_ner_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nees_bert_base_portuguese_cased_finetuned_ner_pipeline` is a English model originally trained by yuridrcosta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nees_bert_base_portuguese_cased_finetuned_ner_pipeline_en_5.5.1_3.0_1731290424071.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nees_bert_base_portuguese_cased_finetuned_ner_pipeline_en_5.5.1_3.0_1731290424071.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nees_bert_base_portuguese_cased_finetuned_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nees_bert_base_portuguese_cased_finetuned_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nees_bert_base_portuguese_cased_finetuned_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.0 MB| + +## References + +https://huggingface.co/yuridrcosta/nees-bert-base-portuguese-cased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-nepal_bhasa_biored_model_en.md b/docs/_posts/ahmedlone127/2024-11-11-nepal_bhasa_biored_model_en.md new file mode 100644 index 00000000000000..7b17b75e6b2a00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-nepal_bhasa_biored_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nepal_bhasa_biored_model BertForTokenClassification from c-x-he +author: John Snow Labs +name: nepal_bhasa_biored_model +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepal_bhasa_biored_model` is a English model originally trained by c-x-he. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepal_bhasa_biored_model_en_5.5.1_3.0_1731299083922.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepal_bhasa_biored_model_en_5.5.1_3.0_1731299083922.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("nepal_bhasa_biored_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("nepal_bhasa_biored_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepal_bhasa_biored_model| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/c-x-he/New_BioRED_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-nepal_bhasa_biored_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-nepal_bhasa_biored_model_pipeline_en.md new file mode 100644 index 00000000000000..1e9e12dce5f0ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-nepal_bhasa_biored_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nepal_bhasa_biored_model_pipeline pipeline BertForTokenClassification from c-x-he +author: John Snow Labs +name: nepal_bhasa_biored_model_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepal_bhasa_biored_model_pipeline` is a English model originally trained by c-x-he. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepal_bhasa_biored_model_pipeline_en_5.5.1_3.0_1731299109074.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepal_bhasa_biored_model_pipeline_en_5.5.1_3.0_1731299109074.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nepal_bhasa_biored_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nepal_bhasa_biored_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepal_bhasa_biored_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/c-x-he/New_BioRED_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-ner_finetuning_beto_en.md b/docs/_posts/ahmedlone127/2024-11-11-ner_finetuning_beto_en.md new file mode 100644 index 00000000000000..ca901d075e86d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-ner_finetuning_beto_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_finetuning_beto BertForTokenClassification from raulgdp +author: John Snow Labs +name: ner_finetuning_beto +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_finetuning_beto` is a English model originally trained by raulgdp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_finetuning_beto_en_5.5.1_3.0_1731290423097.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_finetuning_beto_en_5.5.1_3.0_1731290423097.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("ner_finetuning_beto","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("ner_finetuning_beto", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_finetuning_beto| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/raulgdp/NER-finetuning-BETO \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-ner_finetuning_beto_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-ner_finetuning_beto_pipeline_en.md new file mode 100644 index 00000000000000..aca24b66ba6ff9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-ner_finetuning_beto_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ner_finetuning_beto_pipeline pipeline BertForTokenClassification from raulgdp +author: John Snow Labs +name: ner_finetuning_beto_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_finetuning_beto_pipeline` is a English model originally trained by raulgdp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_finetuning_beto_pipeline_en_5.5.1_3.0_1731290448484.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_finetuning_beto_pipeline_en_5.5.1_3.0_1731290448484.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_finetuning_beto_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_finetuning_beto_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_finetuning_beto_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/raulgdp/NER-finetuning-BETO + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-ner_finetuning_beto_pro_en.md b/docs/_posts/ahmedlone127/2024-11-11-ner_finetuning_beto_pro_en.md new file mode 100644 index 00000000000000..f227572b453a38 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-ner_finetuning_beto_pro_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_finetuning_beto_pro BertForTokenClassification from raulgdp +author: John Snow Labs +name: ner_finetuning_beto_pro +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_finetuning_beto_pro` is a English model originally trained by raulgdp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_finetuning_beto_pro_en_5.5.1_3.0_1731290926725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_finetuning_beto_pro_en_5.5.1_3.0_1731290926725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("ner_finetuning_beto_pro","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("ner_finetuning_beto_pro", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_finetuning_beto_pro| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/raulgdp/NER-finetuning-BETO-PRO \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-ner_finetuning_beto_pro_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-ner_finetuning_beto_pro_pipeline_en.md new file mode 100644 index 00000000000000..86b6c7d149b34b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-ner_finetuning_beto_pro_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ner_finetuning_beto_pro_pipeline pipeline BertForTokenClassification from raulgdp +author: John Snow Labs +name: ner_finetuning_beto_pro_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_finetuning_beto_pro_pipeline` is a English model originally trained by raulgdp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_finetuning_beto_pro_pipeline_en_5.5.1_3.0_1731290947906.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_finetuning_beto_pro_pipeline_en_5.5.1_3.0_1731290947906.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_finetuning_beto_pro_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_finetuning_beto_pro_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_finetuning_beto_pro_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/raulgdp/NER-finetuning-BETO-PRO + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-ner_model_en.md b/docs/_posts/ahmedlone127/2024-11-11-ner_model_en.md new file mode 100644 index 00000000000000..184b8ed0fb9ff0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-ner_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_model BertForTokenClassification from MichaelSargious +author: John Snow Labs +name: ner_model +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_model` is a English model originally trained by MichaelSargious. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_model_en_5.5.1_3.0_1731290717439.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_model_en_5.5.1_3.0_1731290717439.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("ner_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("ner_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_model| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/MichaelSargious/ner_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-ner_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-ner_model_pipeline_en.md new file mode 100644 index 00000000000000..a38e0bd1042ce6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-ner_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ner_model_pipeline pipeline BertForTokenClassification from MichaelSargious +author: John Snow Labs +name: ner_model_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_model_pipeline` is a English model originally trained by MichaelSargious. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_model_pipeline_en_5.5.1_3.0_1731290752686.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_model_pipeline_en_5.5.1_3.0_1731290752686.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/MichaelSargious/ner_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-ner_tokenclassification_persian_en.md b/docs/_posts/ahmedlone127/2024-11-11-ner_tokenclassification_persian_en.md new file mode 100644 index 00000000000000..de9e6d9f0963be --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-ner_tokenclassification_persian_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_tokenclassification_persian BertForTokenClassification from AidAFadaeian +author: John Snow Labs +name: ner_tokenclassification_persian +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_tokenclassification_persian` is a English model originally trained by AidAFadaeian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_tokenclassification_persian_en_5.5.1_3.0_1731298967003.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_tokenclassification_persian_en_5.5.1_3.0_1731298967003.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("ner_tokenclassification_persian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("ner_tokenclassification_persian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_tokenclassification_persian| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|606.6 MB| + +## References + +https://huggingface.co/AidAFadaeian/NER_tokenclassification_persian \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-ner_tokenclassification_persian_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-ner_tokenclassification_persian_pipeline_en.md new file mode 100644 index 00000000000000..6eaca66cadd93a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-ner_tokenclassification_persian_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ner_tokenclassification_persian_pipeline pipeline BertForTokenClassification from AidAFadaeian +author: John Snow Labs +name: ner_tokenclassification_persian_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_tokenclassification_persian_pipeline` is a English model originally trained by AidAFadaeian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_tokenclassification_persian_pipeline_en_5.5.1_3.0_1731298999068.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_tokenclassification_persian_pipeline_en_5.5.1_3.0_1731298999068.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_tokenclassification_persian_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_tokenclassification_persian_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_tokenclassification_persian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|606.6 MB| + +## References + +https://huggingface.co/AidAFadaeian/NER_tokenclassification_persian + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-ner_xlmr_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-11-11-ner_xlmr_pipeline_xx.md new file mode 100644 index 00000000000000..b8f08c1fba4434 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-ner_xlmr_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual ner_xlmr_pipeline pipeline XlmRoBertaForTokenClassification from programmersilvanus +author: John Snow Labs +name: ner_xlmr_pipeline +date: 2024-11-11 +tags: [xx, open_source, pipeline, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_xlmr_pipeline` is a Multilingual model originally trained by programmersilvanus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_xlmr_pipeline_xx_5.5.1_3.0_1731293483443.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_xlmr_pipeline_xx_5.5.1_3.0_1731293483443.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_xlmr_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_xlmr_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_xlmr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|832.6 MB| + +## References + +https://huggingface.co/programmersilvanus/ner-xlmr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-ner_xlmr_xx.md b/docs/_posts/ahmedlone127/2024-11-11-ner_xlmr_xx.md new file mode 100644 index 00000000000000..2f8c9718aa9e4e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-ner_xlmr_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual ner_xlmr XlmRoBertaForTokenClassification from programmersilvanus +author: John Snow Labs +name: ner_xlmr +date: 2024-11-11 +tags: [xx, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_xlmr` is a Multilingual model originally trained by programmersilvanus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_xlmr_xx_5.5.1_3.0_1731293392020.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_xlmr_xx_5.5.1_3.0_1731293392020.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("ner_xlmr","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("ner_xlmr", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_xlmr| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|xx| +|Size:|832.6 MB| + +## References + +https://huggingface.co/programmersilvanus/ner-xlmr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-nerugm_base_3_id.md b/docs/_posts/ahmedlone127/2024-11-11-nerugm_base_3_id.md new file mode 100644 index 00000000000000..24b935db19a2b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-nerugm_base_3_id.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Indonesian nerugm_base_3 BertForTokenClassification from apwic +author: John Snow Labs +name: nerugm_base_3 +date: 2024-11-11 +tags: [id, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: id +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nerugm_base_3` is a Indonesian model originally trained by apwic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nerugm_base_3_id_5.5.1_3.0_1731285227978.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nerugm_base_3_id_5.5.1_3.0_1731285227978.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("nerugm_base_3","id") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("nerugm_base_3", "id") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nerugm_base_3| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|id| +|Size:|411.8 MB| + +## References + +https://huggingface.co/apwic/nerugm-base-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-nerugm_base_3_pipeline_id.md b/docs/_posts/ahmedlone127/2024-11-11-nerugm_base_3_pipeline_id.md new file mode 100644 index 00000000000000..be21a734240864 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-nerugm_base_3_pipeline_id.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Indonesian nerugm_base_3_pipeline pipeline BertForTokenClassification from apwic +author: John Snow Labs +name: nerugm_base_3_pipeline +date: 2024-11-11 +tags: [id, open_source, pipeline, onnx] +task: Named Entity Recognition +language: id +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nerugm_base_3_pipeline` is a Indonesian model originally trained by apwic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nerugm_base_3_pipeline_id_5.5.1_3.0_1731285257868.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nerugm_base_3_pipeline_id_5.5.1_3.0_1731285257868.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nerugm_base_3_pipeline", lang = "id") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nerugm_base_3_pipeline", lang = "id") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nerugm_base_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|id| +|Size:|411.8 MB| + +## References + +https://huggingface.co/apwic/nerugm-base-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-nlp_tutorial_ner_en.md b/docs/_posts/ahmedlone127/2024-11-11-nlp_tutorial_ner_en.md new file mode 100644 index 00000000000000..273a281d2141fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-nlp_tutorial_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nlp_tutorial_ner BertForTokenClassification from Conan-Lao +author: John Snow Labs +name: nlp_tutorial_ner +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nlp_tutorial_ner` is a English model originally trained by Conan-Lao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nlp_tutorial_ner_en_5.5.1_3.0_1731290341170.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nlp_tutorial_ner_en_5.5.1_3.0_1731290341170.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("nlp_tutorial_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("nlp_tutorial_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nlp_tutorial_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/Conan-Lao/nlp_tutorial_ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-nlp_tutorial_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-nlp_tutorial_ner_pipeline_en.md new file mode 100644 index 00000000000000..f5fdf808a7e70c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-nlp_tutorial_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nlp_tutorial_ner_pipeline pipeline BertForTokenClassification from Conan-Lao +author: John Snow Labs +name: nlp_tutorial_ner_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nlp_tutorial_ner_pipeline` is a English model originally trained by Conan-Lao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nlp_tutorial_ner_pipeline_en_5.5.1_3.0_1731290362432.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nlp_tutorial_ner_pipeline_en_5.5.1_3.0_1731290362432.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nlp_tutorial_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nlp_tutorial_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nlp_tutorial_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/Conan-Lao/nlp_tutorial_ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_en.md b/docs/_posts/ahmedlone127/2024-11-11-norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_en.md new file mode 100644 index 00000000000000..3a0a51b8d3fd9d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp BertForQuestionAnswering from eanderson +author: John Snow Labs +name: norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp` is a English model originally trained by eanderson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_en_5.5.1_3.0_1731289481925.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_en_5.5.1_3.0_1731289481925.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|666.2 MB| + +## References + +https://huggingface.co/eanderson/nb-bert-base-qa-squad-nb_v2_temp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_pipeline_en.md new file mode 100644 index 00000000000000..5c6816c1f92b65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_pipeline pipeline BertForQuestionAnswering from eanderson +author: John Snow Labs +name: norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_pipeline` is a English model originally trained by eanderson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_pipeline_en_5.5.1_3.0_1731289516281.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_pipeline_en_5.5.1_3.0_1731289516281.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_bert_base_qa_squad_norwegian_bokml_v2_temp_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|666.2 MB| + +## References + +https://huggingface.co/eanderson/nb-bert-base-qa-squad-nb_v2_temp + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-nusabert_base_posp_en.md b/docs/_posts/ahmedlone127/2024-11-11-nusabert_base_posp_en.md new file mode 100644 index 00000000000000..ffab257ec4f50b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-nusabert_base_posp_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nusabert_base_posp BertForTokenClassification from LazarusNLP +author: John Snow Labs +name: nusabert_base_posp +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nusabert_base_posp` is a English model originally trained by LazarusNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nusabert_base_posp_en_5.5.1_3.0_1731285880660.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nusabert_base_posp_en_5.5.1_3.0_1731285880660.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("nusabert_base_posp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("nusabert_base_posp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nusabert_base_posp| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|412.5 MB| + +## References + +https://huggingface.co/LazarusNLP/NusaBERT-base-POSP \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-nusabert_base_posp_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-nusabert_base_posp_pipeline_en.md new file mode 100644 index 00000000000000..138b88f6fe0d26 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-nusabert_base_posp_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nusabert_base_posp_pipeline pipeline BertForTokenClassification from LazarusNLP +author: John Snow Labs +name: nusabert_base_posp_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nusabert_base_posp_pipeline` is a English model originally trained by LazarusNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nusabert_base_posp_pipeline_en_5.5.1_3.0_1731285902489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nusabert_base_posp_pipeline_en_5.5.1_3.0_1731285902489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nusabert_base_posp_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nusabert_base_posp_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nusabert_base_posp_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.5 MB| + +## References + +https://huggingface.co/LazarusNLP/NusaBERT-base-POSP + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-persian_text_ner_bert_v1_fa.md b/docs/_posts/ahmedlone127/2024-11-11-persian_text_ner_bert_v1_fa.md new file mode 100644 index 00000000000000..32d1574f8c3718 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-persian_text_ner_bert_v1_fa.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Persian persian_text_ner_bert_v1 BertForTokenClassification from SeyedAli +author: John Snow Labs +name: persian_text_ner_bert_v1 +date: 2024-11-11 +tags: [fa, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: fa +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`persian_text_ner_bert_v1` is a Persian model originally trained by SeyedAli. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/persian_text_ner_bert_v1_fa_5.5.1_3.0_1731299048875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/persian_text_ner_bert_v1_fa_5.5.1_3.0_1731299048875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("persian_text_ner_bert_v1","fa") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("persian_text_ner_bert_v1", "fa") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|persian_text_ner_bert_v1| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|fa| +|Size:|606.6 MB| + +## References + +https://huggingface.co/SeyedAli/Persian-Text-NER-Bert-V1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-persian_text_ner_bert_v1_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-11-11-persian_text_ner_bert_v1_pipeline_fa.md new file mode 100644 index 00000000000000..06ba601147d270 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-persian_text_ner_bert_v1_pipeline_fa.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Persian persian_text_ner_bert_v1_pipeline pipeline BertForTokenClassification from SeyedAli +author: John Snow Labs +name: persian_text_ner_bert_v1_pipeline +date: 2024-11-11 +tags: [fa, open_source, pipeline, onnx] +task: Named Entity Recognition +language: fa +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`persian_text_ner_bert_v1_pipeline` is a Persian model originally trained by SeyedAli. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/persian_text_ner_bert_v1_pipeline_fa_5.5.1_3.0_1731299082120.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/persian_text_ner_bert_v1_pipeline_fa_5.5.1_3.0_1731299082120.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("persian_text_ner_bert_v1_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("persian_text_ner_bert_v1_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|persian_text_ner_bert_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|606.6 MB| + +## References + +https://huggingface.co/SeyedAli/Persian-Text-NER-Bert-V1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-personalinfoclassifier_en.md b/docs/_posts/ahmedlone127/2024-11-11-personalinfoclassifier_en.md new file mode 100644 index 00000000000000..e739f4b8c1dc41 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-personalinfoclassifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English personalinfoclassifier BertForSequenceClassification from MrAB01 +author: John Snow Labs +name: personalinfoclassifier +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`personalinfoclassifier` is a English model originally trained by MrAB01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/personalinfoclassifier_en_5.5.1_3.0_1731309188691.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/personalinfoclassifier_en_5.5.1_3.0_1731309188691.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("personalinfoclassifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("personalinfoclassifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|personalinfoclassifier| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|412.0 MB| + +## References + +https://huggingface.co/MrAB01/PersonalInfoClassifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-personalinfoclassifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-personalinfoclassifier_pipeline_en.md new file mode 100644 index 00000000000000..099794319e615b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-personalinfoclassifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English personalinfoclassifier_pipeline pipeline BertForSequenceClassification from MrAB01 +author: John Snow Labs +name: personalinfoclassifier_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`personalinfoclassifier_pipeline` is a English model originally trained by MrAB01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/personalinfoclassifier_pipeline_en_5.5.1_3.0_1731309210318.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/personalinfoclassifier_pipeline_en_5.5.1_3.0_1731309210318.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("personalinfoclassifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("personalinfoclassifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|personalinfoclassifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.0 MB| + +## References + +https://huggingface.co/MrAB01/PersonalInfoClassifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-politeness_model_en.md b/docs/_posts/ahmedlone127/2024-11-11-politeness_model_en.md new file mode 100644 index 00000000000000..54b7e18676d712 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-politeness_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English politeness_model BertForSequenceClassification from gljj +author: John Snow Labs +name: politeness_model +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`politeness_model` is a English model originally trained by gljj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/politeness_model_en_5.5.1_3.0_1731310062293.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/politeness_model_en_5.5.1_3.0_1731310062293.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("politeness_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("politeness_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|politeness_model| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/gljj/politeness-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-politeness_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-politeness_model_pipeline_en.md new file mode 100644 index 00000000000000..ae45b23ba3f142 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-politeness_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English politeness_model_pipeline pipeline BertForSequenceClassification from gljj +author: John Snow Labs +name: politeness_model_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`politeness_model_pipeline` is a English model originally trained by gljj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/politeness_model_pipeline_en_5.5.1_3.0_1731310084610.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/politeness_model_pipeline_en_5.5.1_3.0_1731310084610.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("politeness_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("politeness_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|politeness_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/gljj/politeness-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-popbert_de.md b/docs/_posts/ahmedlone127/2024-11-11-popbert_de.md new file mode 100644 index 00000000000000..67f9fa00c1f2f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-popbert_de.md @@ -0,0 +1,94 @@ +--- +layout: model +title: German popbert BertForSequenceClassification from luerhard +author: John Snow Labs +name: popbert +date: 2024-11-11 +tags: [de, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: de +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`popbert` is a German model originally trained by luerhard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/popbert_de_5.5.1_3.0_1731309940376.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/popbert_de_5.5.1_3.0_1731309940376.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("popbert","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("popbert", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|popbert| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|de| +|Size:|1.3 GB| + +## References + +https://huggingface.co/luerhard/PopBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-popbert_pipeline_de.md b/docs/_posts/ahmedlone127/2024-11-11-popbert_pipeline_de.md new file mode 100644 index 00000000000000..7e5575a55024d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-popbert_pipeline_de.md @@ -0,0 +1,72 @@ +--- +layout: model +title: German popbert_pipeline pipeline BertForSequenceClassification from luerhard +author: John Snow Labs +name: popbert_pipeline +date: 2024-11-11 +tags: [de, open_source, pipeline, onnx] +task: Text Classification +language: de +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`popbert_pipeline` is a German model originally trained by luerhard. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/popbert_pipeline_de_5.5.1_3.0_1731310008414.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/popbert_pipeline_de_5.5.1_3.0_1731310008414.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +pipeline = PretrainedPipeline("popbert_pipeline", lang = "de") +annotations = pipeline.transform(df) +``` +```scala +val pipeline = new PretrainedPipeline("popbert_pipeline", lang = "de") +val annotations = pipeline.transform(df) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|popbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|1.3 GB| + +## References + +References + +https://huggingface.co/luerhard/PopBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-pretrain_finetuned_v2_en.md b/docs/_posts/ahmedlone127/2024-11-11-pretrain_finetuned_v2_en.md new file mode 100644 index 00000000000000..3db9d5e6492766 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-pretrain_finetuned_v2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English pretrain_finetuned_v2 BertForQuestionAnswering from marinaibr +author: John Snow Labs +name: pretrain_finetuned_v2 +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pretrain_finetuned_v2` is a English model originally trained by marinaibr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pretrain_finetuned_v2_en_5.5.1_3.0_1731289827138.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pretrain_finetuned_v2_en_5.5.1_3.0_1731289827138.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("pretrain_finetuned_v2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("pretrain_finetuned_v2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pretrain_finetuned_v2| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|407.8 MB| + +## References + +https://huggingface.co/marinaibr/pretrain-finetuned-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-pretrain_finetuned_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-pretrain_finetuned_v2_pipeline_en.md new file mode 100644 index 00000000000000..ea7a8596f7dd96 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-pretrain_finetuned_v2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English pretrain_finetuned_v2_pipeline pipeline BertForQuestionAnswering from marinaibr +author: John Snow Labs +name: pretrain_finetuned_v2_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pretrain_finetuned_v2_pipeline` is a English model originally trained by marinaibr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pretrain_finetuned_v2_pipeline_en_5.5.1_3.0_1731289848049.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pretrain_finetuned_v2_pipeline_en_5.5.1_3.0_1731289848049.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("pretrain_finetuned_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("pretrain_finetuned_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pretrain_finetuned_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/marinaibr/pretrain-finetuned-v2 + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-procedure_tool_matching_10_epochs_en.md b/docs/_posts/ahmedlone127/2024-11-11-procedure_tool_matching_10_epochs_en.md new file mode 100644 index 00000000000000..ae7a586b6d8401 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-procedure_tool_matching_10_epochs_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English procedure_tool_matching_10_epochs MPNetEmbeddings from brilan +author: John Snow Labs +name: procedure_tool_matching_10_epochs +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`procedure_tool_matching_10_epochs` is a English model originally trained by brilan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/procedure_tool_matching_10_epochs_en_5.5.1_3.0_1731294702325.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/procedure_tool_matching_10_epochs_en_5.5.1_3.0_1731294702325.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("procedure_tool_matching_10_epochs","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("procedure_tool_matching_10_epochs","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|procedure_tool_matching_10_epochs| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/brilan/procedure-tool-matching_10_epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-procedure_tool_matching_10_epochs_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-procedure_tool_matching_10_epochs_pipeline_en.md new file mode 100644 index 00000000000000..cd0aabc0210a34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-procedure_tool_matching_10_epochs_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English procedure_tool_matching_10_epochs_pipeline pipeline MPNetEmbeddings from brilan +author: John Snow Labs +name: procedure_tool_matching_10_epochs_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`procedure_tool_matching_10_epochs_pipeline` is a English model originally trained by brilan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/procedure_tool_matching_10_epochs_pipeline_en_5.5.1_3.0_1731294728804.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/procedure_tool_matching_10_epochs_pipeline_en_5.5.1_3.0_1731294728804.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("procedure_tool_matching_10_epochs_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("procedure_tool_matching_10_epochs_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|procedure_tool_matching_10_epochs_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/brilan/procedure-tool-matching_10_epochs + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-product_model_en.md b/docs/_posts/ahmedlone127/2024-11-11-product_model_en.md new file mode 100644 index 00000000000000..01ff8c7b0b9f05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-product_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English product_model MPNetEmbeddings from alpcansoydas +author: John Snow Labs +name: product_model +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`product_model` is a English model originally trained by alpcansoydas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/product_model_en_5.5.1_3.0_1731295066547.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/product_model_en_5.5.1_3.0_1731295066547.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("product_model","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("product_model","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|product_model| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/alpcansoydas/product-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-product_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-product_model_pipeline_en.md new file mode 100644 index 00000000000000..5509af1db40ee6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-product_model_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English product_model_pipeline pipeline MPNetEmbeddings from alpcansoydas +author: John Snow Labs +name: product_model_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`product_model_pipeline` is a English model originally trained by alpcansoydas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/product_model_pipeline_en_5.5.1_3.0_1731295088962.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/product_model_pipeline_en_5.5.1_3.0_1731295088962.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("product_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("product_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|product_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/alpcansoydas/product-model + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-product_recognize_en.md b/docs/_posts/ahmedlone127/2024-11-11-product_recognize_en.md new file mode 100644 index 00000000000000..c911f6eae3a7a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-product_recognize_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English product_recognize BertForTokenClassification from HentDios +author: John Snow Labs +name: product_recognize +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`product_recognize` is a English model originally trained by HentDios. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/product_recognize_en_5.5.1_3.0_1731285314999.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/product_recognize_en_5.5.1_3.0_1731285314999.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("product_recognize","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("product_recognize", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|product_recognize| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/HentDios/product-recognize \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-product_recognize_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-product_recognize_pipeline_en.md new file mode 100644 index 00000000000000..b3c35a440d49d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-product_recognize_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English product_recognize_pipeline pipeline BertForTokenClassification from HentDios +author: John Snow Labs +name: product_recognize_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`product_recognize_pipeline` is a English model originally trained by HentDios. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/product_recognize_pipeline_en_5.5.1_3.0_1731285338977.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/product_recognize_pipeline_en_5.5.1_3.0_1731285338977.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("product_recognize_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("product_recognize_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|product_recognize_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/HentDios/product-recognize + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-propaganda_ner_arabic_ar.md b/docs/_posts/ahmedlone127/2024-11-11-propaganda_ner_arabic_ar.md new file mode 100644 index 00000000000000..5c205e9c4b4111 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-propaganda_ner_arabic_ar.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Arabic propaganda_ner_arabic BertForTokenClassification from ashrafulparan +author: John Snow Labs +name: propaganda_ner_arabic +date: 2024-11-11 +tags: [ar, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: ar +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`propaganda_ner_arabic` is a Arabic model originally trained by ashrafulparan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/propaganda_ner_arabic_ar_5.5.1_3.0_1731290522918.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/propaganda_ner_arabic_ar_5.5.1_3.0_1731290522918.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("propaganda_ner_arabic","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("propaganda_ner_arabic", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|propaganda_ner_arabic| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|ar| +|Size:|406.7 MB| + +## References + +https://huggingface.co/ashrafulparan/Propaganda-NER-Arabic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-propaganda_ner_arabic_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-11-11-propaganda_ner_arabic_pipeline_ar.md new file mode 100644 index 00000000000000..f4e3da52799e79 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-propaganda_ner_arabic_pipeline_ar.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Arabic propaganda_ner_arabic_pipeline pipeline BertForTokenClassification from ashrafulparan +author: John Snow Labs +name: propaganda_ner_arabic_pipeline +date: 2024-11-11 +tags: [ar, open_source, pipeline, onnx] +task: Named Entity Recognition +language: ar +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`propaganda_ner_arabic_pipeline` is a Arabic model originally trained by ashrafulparan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/propaganda_ner_arabic_pipeline_ar_5.5.1_3.0_1731290544801.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/propaganda_ner_arabic_pipeline_ar_5.5.1_3.0_1731290544801.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("propaganda_ner_arabic_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("propaganda_ner_arabic_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|propaganda_ner_arabic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|406.8 MB| + +## References + +https://huggingface.co/ashrafulparan/Propaganda-NER-Arabic + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-pubmedbert_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-11-11-pubmedbert_finetuned_ner_en.md new file mode 100644 index 00000000000000..7f8355299d1d55 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-pubmedbert_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English pubmedbert_finetuned_ner BertForTokenClassification from jialinselenasong +author: John Snow Labs +name: pubmedbert_finetuned_ner +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pubmedbert_finetuned_ner` is a English model originally trained by jialinselenasong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pubmedbert_finetuned_ner_en_5.5.1_3.0_1731290761905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pubmedbert_finetuned_ner_en_5.5.1_3.0_1731290761905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("pubmedbert_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("pubmedbert_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pubmedbert_finetuned_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/jialinselenasong/pubmedbert-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-pubmedbert_finetuned_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-pubmedbert_finetuned_ner_pipeline_en.md new file mode 100644 index 00000000000000..9b2030be9980a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-pubmedbert_finetuned_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English pubmedbert_finetuned_ner_pipeline pipeline BertForTokenClassification from jialinselenasong +author: John Snow Labs +name: pubmedbert_finetuned_ner_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pubmedbert_finetuned_ner_pipeline` is a English model originally trained by jialinselenasong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pubmedbert_finetuned_ner_pipeline_en_5.5.1_3.0_1731290786027.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pubmedbert_finetuned_ner_pipeline_en_5.5.1_3.0_1731290786027.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("pubmedbert_finetuned_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("pubmedbert_finetuned_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pubmedbert_finetuned_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/jialinselenasong/pubmedbert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-qa_model_balchid_en.md b/docs/_posts/ahmedlone127/2024-11-11-qa_model_balchid_en.md new file mode 100644 index 00000000000000..33ca00f12234a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-qa_model_balchid_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_model_balchid BertForQuestionAnswering from balchid +author: John Snow Labs +name: qa_model_balchid +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_model_balchid` is a English model originally trained by balchid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_model_balchid_en_5.5.1_3.0_1731289028794.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_model_balchid_en_5.5.1_3.0_1731289028794.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("qa_model_balchid","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("qa_model_balchid", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_model_balchid| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/balchid/qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-qa_model_balchid_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-qa_model_balchid_pipeline_en.md new file mode 100644 index 00000000000000..5159bb9652d99d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-qa_model_balchid_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_model_balchid_pipeline pipeline BertForQuestionAnswering from balchid +author: John Snow Labs +name: qa_model_balchid_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_model_balchid_pipeline` is a English model originally trained by balchid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_model_balchid_pipeline_en_5.5.1_3.0_1731289051251.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_model_balchid_pipeline_en_5.5.1_3.0_1731289051251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_model_balchid_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_model_balchid_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_model_balchid_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/balchid/qa_model + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-quote_model_bertm_v1_en.md b/docs/_posts/ahmedlone127/2024-11-11-quote_model_bertm_v1_en.md new file mode 100644 index 00000000000000..a7d99077559c50 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-quote_model_bertm_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English quote_model_bertm_v1 BertForTokenClassification from Iceland +author: John Snow Labs +name: quote_model_bertm_v1 +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`quote_model_bertm_v1` is a English model originally trained by Iceland. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/quote_model_bertm_v1_en_5.5.1_3.0_1731298749377.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/quote_model_bertm_v1_en_5.5.1_3.0_1731298749377.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("quote_model_bertm_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("quote_model_bertm_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|quote_model_bertm_v1| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/Iceland/quote-model-BERTm-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-quote_model_bertm_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-quote_model_bertm_v1_pipeline_en.md new file mode 100644 index 00000000000000..eeff29fc89a209 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-quote_model_bertm_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English quote_model_bertm_v1_pipeline pipeline BertForTokenClassification from Iceland +author: John Snow Labs +name: quote_model_bertm_v1_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`quote_model_bertm_v1_pipeline` is a English model originally trained by Iceland. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/quote_model_bertm_v1_pipeline_en_5.5.1_3.0_1731298784216.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/quote_model_bertm_v1_pipeline_en_5.5.1_3.0_1731298784216.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("quote_model_bertm_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("quote_model_bertm_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|quote_model_bertm_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/Iceland/quote-model-BERTm-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-results_en.md b/docs/_posts/ahmedlone127/2024-11-11-results_en.md new file mode 100644 index 00000000000000..a13d5742fcd173 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-results_en.md @@ -0,0 +1,96 @@ +--- +layout: model +title: English results DistilBertForQuestionAnswering from Souvik123 +author: John Snow Labs +name: results +date: 2024-11-11 +tags: [distilbert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`results` is a English model originally trained by Souvik123. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/results_en_5.5.1_3.0_1731301209851.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/results_en_5.5.1_3.0_1731301209851.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = DistilBertForQuestionAnswering.pretrained("results","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering + .pretrained("results", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|results| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.2 MB| + +## References + +References + +References + +References + +https://huggingface.co/Souvik123/results \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-results_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-results_pipeline_en.md new file mode 100644 index 00000000000000..6c723b057ba53e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-results_pipeline_en.md @@ -0,0 +1,72 @@ +--- +layout: model +title: English results_pipeline pipeline RoBertaForTokenClassification from danielyoo +author: John Snow Labs +name: results_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`results_pipeline` is a English model originally trained by danielyoo. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/results_pipeline_en_5.5.1_3.0_1731301232582.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/results_pipeline_en_5.5.1_3.0_1731301232582.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +pipeline = PretrainedPipeline("results_pipeline", lang = "en") +annotations = pipeline.transform(df) +``` +```scala +val pipeline = new PretrainedPipeline("results_pipeline", lang = "en") +val annotations = pipeline.transform(df) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|results_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.2 MB| + +## References + +References + +https://huggingface.co/danielyoo/results + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_base_absa_ate_sentiment_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_absa_ate_sentiment_en.md new file mode 100644 index 00000000000000..bcfcac9683d809 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_absa_ate_sentiment_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_absa_ate_sentiment RoBertaForTokenClassification from gauneg +author: John Snow Labs +name: roberta_base_absa_ate_sentiment +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_absa_ate_sentiment` is a English model originally trained by gauneg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_absa_ate_sentiment_en_5.5.1_3.0_1731314074640.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_absa_ate_sentiment_en_5.5.1_3.0_1731314074640.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_absa_ate_sentiment","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_absa_ate_sentiment", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_absa_ate_sentiment| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|451.2 MB| + +## References + +https://huggingface.co/gauneg/roberta-base-absa-ate-sentiment \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_base_absa_ate_sentiment_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_absa_ate_sentiment_pipeline_en.md new file mode 100644 index 00000000000000..80bc7b25f26e26 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_absa_ate_sentiment_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_absa_ate_sentiment_pipeline pipeline RoBertaForTokenClassification from gauneg +author: John Snow Labs +name: roberta_base_absa_ate_sentiment_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_absa_ate_sentiment_pipeline` is a English model originally trained by gauneg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_absa_ate_sentiment_pipeline_en_5.5.1_3.0_1731314099373.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_absa_ate_sentiment_pipeline_en_5.5.1_3.0_1731314099373.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_absa_ate_sentiment_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_absa_ate_sentiment_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_absa_ate_sentiment_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|451.2 MB| + +## References + +https://huggingface.co/gauneg/roberta-base-absa-ate-sentiment + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_base_ainu_sayula_popoluca_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_ainu_sayula_popoluca_en.md new file mode 100644 index 00000000000000..17557ced9a6887 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_ainu_sayula_popoluca_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_ainu_sayula_popoluca RoBertaForTokenClassification from aynumosir +author: John Snow Labs +name: roberta_base_ainu_sayula_popoluca +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_ainu_sayula_popoluca` is a English model originally trained by aynumosir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_ainu_sayula_popoluca_en_5.5.1_3.0_1731311703241.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_ainu_sayula_popoluca_en_5.5.1_3.0_1731311703241.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_ainu_sayula_popoluca","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_ainu_sayula_popoluca", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_ainu_sayula_popoluca| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|465.1 MB| + +## References + +https://huggingface.co/aynumosir/roberta-base-ainu-pos \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_base_ainu_sayula_popoluca_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_ainu_sayula_popoluca_pipeline_en.md new file mode 100644 index 00000000000000..5efb43e0ba2dbc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_ainu_sayula_popoluca_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_ainu_sayula_popoluca_pipeline pipeline RoBertaForTokenClassification from aynumosir +author: John Snow Labs +name: roberta_base_ainu_sayula_popoluca_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_ainu_sayula_popoluca_pipeline` is a English model originally trained by aynumosir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_ainu_sayula_popoluca_pipeline_en_5.5.1_3.0_1731311731304.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_ainu_sayula_popoluca_pipeline_en_5.5.1_3.0_1731311731304.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_ainu_sayula_popoluca_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_ainu_sayula_popoluca_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_ainu_sayula_popoluca_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.2 MB| + +## References + +https://huggingface.co/aynumosir/roberta-base-ainu-pos + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_base_bne_capitel_ner_bsc_lt_pipeline_es.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_bne_capitel_ner_bsc_lt_pipeline_es.md new file mode 100644 index 00000000000000..1a463d8182f2f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_bne_capitel_ner_bsc_lt_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish roberta_base_bne_capitel_ner_bsc_lt_pipeline pipeline RoBertaForTokenClassification from BSC-LT +author: John Snow Labs +name: roberta_base_bne_capitel_ner_bsc_lt_pipeline +date: 2024-11-11 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_bne_capitel_ner_bsc_lt_pipeline` is a Castilian, Spanish model originally trained by BSC-LT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_bne_capitel_ner_bsc_lt_pipeline_es_5.5.1_3.0_1731311863930.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_bne_capitel_ner_bsc_lt_pipeline_es_5.5.1_3.0_1731311863930.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_bne_capitel_ner_bsc_lt_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_bne_capitel_ner_bsc_lt_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_bne_capitel_ner_bsc_lt_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|456.6 MB| + +## References + +https://huggingface.co/BSC-LT/roberta-base-bne-capitel-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_base_bne_capitel_ner_plantl_gob_es_pipeline_es.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_bne_capitel_ner_plantl_gob_es_pipeline_es.md new file mode 100644 index 00000000000000..0b105e39847a7a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_bne_capitel_ner_plantl_gob_es_pipeline_es.md @@ -0,0 +1,72 @@ +--- +layout: model +title: Castilian, Spanish roberta_base_bne_capitel_ner_plantl_gob_es_pipeline pipeline RoBertaForTokenClassification from PlanTL-GOB-ES +author: John Snow Labs +name: roberta_base_bne_capitel_ner_plantl_gob_es_pipeline +date: 2024-11-11 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_bne_capitel_ner_plantl_gob_es_pipeline` is a Castilian, Spanish model originally trained by PlanTL-GOB-ES. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_bne_capitel_ner_plantl_gob_es_pipeline_es_5.5.1_3.0_1731312026562.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_bne_capitel_ner_plantl_gob_es_pipeline_es_5.5.1_3.0_1731312026562.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +pipeline = PretrainedPipeline("roberta_base_bne_capitel_ner_plantl_gob_es_pipeline", lang = "es") +annotations = pipeline.transform(df) +``` +```scala +val pipeline = new PretrainedPipeline("roberta_base_bne_capitel_ner_plantl_gob_es_pipeline", lang = "es") +val annotations = pipeline.transform(df) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_bne_capitel_ner_plantl_gob_es_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|456.6 MB| + +## References + +References + +https://huggingface.co/PlanTL-GOB-ES/roberta-base-bne-capitel-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_base_finetuned_ner_cadec_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_finetuned_ner_cadec_en.md new file mode 100644 index 00000000000000..3a97f521081872 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_finetuned_ner_cadec_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_finetuned_ner_cadec RoBertaForTokenClassification from csNoHug +author: John Snow Labs +name: roberta_base_finetuned_ner_cadec +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_finetuned_ner_cadec` is a English model originally trained by csNoHug. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_ner_cadec_en_5.5.1_3.0_1731311612890.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_ner_cadec_en_5.5.1_3.0_1731311612890.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_finetuned_ner_cadec","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_finetuned_ner_cadec", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_finetuned_ner_cadec| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|427.6 MB| + +## References + +https://huggingface.co/csNoHug/roberta-base-finetuned-ner-cadec \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_base_finetuned_ner_cadec_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_finetuned_ner_cadec_pipeline_en.md new file mode 100644 index 00000000000000..a7e5ebcab8f6d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_base_finetuned_ner_cadec_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_finetuned_ner_cadec_pipeline pipeline RoBertaForTokenClassification from csNoHug +author: John Snow Labs +name: roberta_base_finetuned_ner_cadec_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_finetuned_ner_cadec_pipeline` is a English model originally trained by csNoHug. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_ner_cadec_pipeline_en_5.5.1_3.0_1731311647637.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_ner_cadec_pipeline_en_5.5.1_3.0_1731311647637.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_finetuned_ner_cadec_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_finetuned_ner_cadec_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_finetuned_ner_cadec_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|427.6 MB| + +## References + +https://huggingface.co/csNoHug/roberta-base-finetuned-ner-cadec + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_epoch_7_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_epoch_7_en.md new file mode 100644 index 00000000000000..3100b75a89a3df --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_epoch_7_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_combined_generated_epoch_7 RoBertaForTokenClassification from ICT2214Team7 +author: John Snow Labs +name: roberta_combined_generated_epoch_7 +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_combined_generated_epoch_7` is a English model originally trained by ICT2214Team7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_combined_generated_epoch_7_en_5.5.1_3.0_1731314414899.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_combined_generated_epoch_7_en_5.5.1_3.0_1731314414899.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_combined_generated_epoch_7","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_combined_generated_epoch_7", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_combined_generated_epoch_7| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|306.6 MB| + +## References + +https://huggingface.co/ICT2214Team7/RoBERTa_Combined_Generated_epoch_7 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_epoch_7_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_epoch_7_pipeline_en.md new file mode 100644 index 00000000000000..8e90885accfe79 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_epoch_7_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_combined_generated_epoch_7_pipeline pipeline RoBertaForTokenClassification from ICT2214Team7 +author: John Snow Labs +name: roberta_combined_generated_epoch_7_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_combined_generated_epoch_7_pipeline` is a English model originally trained by ICT2214Team7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_combined_generated_epoch_7_pipeline_en_5.5.1_3.0_1731314430537.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_combined_generated_epoch_7_pipeline_en_5.5.1_3.0_1731314430537.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_combined_generated_epoch_7_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_combined_generated_epoch_7_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_combined_generated_epoch_7_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.6 MB| + +## References + +https://huggingface.co/ICT2214Team7/RoBERTa_Combined_Generated_epoch_7 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_v1_1_epoch_7_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_v1_1_epoch_7_en.md new file mode 100644 index 00000000000000..1a06a4181992fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_v1_1_epoch_7_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_combined_generated_v1_1_epoch_7 RoBertaForTokenClassification from ICT2214Team7 +author: John Snow Labs +name: roberta_combined_generated_v1_1_epoch_7 +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_combined_generated_v1_1_epoch_7` is a English model originally trained by ICT2214Team7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_combined_generated_v1_1_epoch_7_en_5.5.1_3.0_1731311705278.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_combined_generated_v1_1_epoch_7_en_5.5.1_3.0_1731311705278.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_combined_generated_v1_1_epoch_7","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_combined_generated_v1_1_epoch_7", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_combined_generated_v1_1_epoch_7| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|306.6 MB| + +## References + +https://huggingface.co/ICT2214Team7/RoBERTa_Combined_Generated_v1.1_epoch_7 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_v1_1_epoch_7_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_v1_1_epoch_7_pipeline_en.md new file mode 100644 index 00000000000000..8b4b7edb97fc5d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_v1_1_epoch_7_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_combined_generated_v1_1_epoch_7_pipeline pipeline RoBertaForTokenClassification from ICT2214Team7 +author: John Snow Labs +name: roberta_combined_generated_v1_1_epoch_7_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_combined_generated_v1_1_epoch_7_pipeline` is a English model originally trained by ICT2214Team7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_combined_generated_v1_1_epoch_7_pipeline_en_5.5.1_3.0_1731311730183.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_combined_generated_v1_1_epoch_7_pipeline_en_5.5.1_3.0_1731311730183.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_combined_generated_v1_1_epoch_7_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_combined_generated_v1_1_epoch_7_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_combined_generated_v1_1_epoch_7_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.6 MB| + +## References + +https://huggingface.co/ICT2214Team7/RoBERTa_Combined_Generated_v1.1_epoch_7 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_v1_1_epoch_8_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_v1_1_epoch_8_en.md new file mode 100644 index 00000000000000..76dc29c3142f32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_v1_1_epoch_8_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_combined_generated_v1_1_epoch_8 RoBertaForTokenClassification from ICT2214Team7 +author: John Snow Labs +name: roberta_combined_generated_v1_1_epoch_8 +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_combined_generated_v1_1_epoch_8` is a English model originally trained by ICT2214Team7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_combined_generated_v1_1_epoch_8_en_5.5.1_3.0_1731311211090.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_combined_generated_v1_1_epoch_8_en_5.5.1_3.0_1731311211090.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_combined_generated_v1_1_epoch_8","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_combined_generated_v1_1_epoch_8", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_combined_generated_v1_1_epoch_8| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|306.6 MB| + +## References + +https://huggingface.co/ICT2214Team7/RoBERTa_Combined_Generated_v1.1_epoch_8 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_v1_1_epoch_8_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_v1_1_epoch_8_pipeline_en.md new file mode 100644 index 00000000000000..37ebbef94efbad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_combined_generated_v1_1_epoch_8_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_combined_generated_v1_1_epoch_8_pipeline pipeline RoBertaForTokenClassification from ICT2214Team7 +author: John Snow Labs +name: roberta_combined_generated_v1_1_epoch_8_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_combined_generated_v1_1_epoch_8_pipeline` is a English model originally trained by ICT2214Team7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_combined_generated_v1_1_epoch_8_pipeline_en_5.5.1_3.0_1731311229047.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_combined_generated_v1_1_epoch_8_pipeline_en_5.5.1_3.0_1731311229047.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_combined_generated_v1_1_epoch_8_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_combined_generated_v1_1_epoch_8_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_combined_generated_v1_1_epoch_8_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.6 MB| + +## References + +https://huggingface.co/ICT2214Team7/RoBERTa_Combined_Generated_v1.1_epoch_8 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_bajanthrimadhu_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_bajanthrimadhu_en.md new file mode 100644 index 00000000000000..1e374b19c60ac7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_bajanthrimadhu_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_finetuned_subjqa_movies_2_bajanthrimadhu RoBertaForQuestionAnswering from BajanthriMadhu +author: John Snow Labs +name: roberta_finetuned_subjqa_movies_2_bajanthrimadhu +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_finetuned_subjqa_movies_2_bajanthrimadhu` is a English model originally trained by BajanthriMadhu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_bajanthrimadhu_en_5.5.1_3.0_1731291741389.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_bajanthrimadhu_en_5.5.1_3.0_1731291741389.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_finetuned_subjqa_movies_2_bajanthrimadhu","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_finetuned_subjqa_movies_2_bajanthrimadhu", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_finetuned_subjqa_movies_2_bajanthrimadhu| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|463.5 MB| + +## References + +https://huggingface.co/BajanthriMadhu/roberta-finetuned-subjqa-movies_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_bajanthrimadhu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_bajanthrimadhu_pipeline_en.md new file mode 100644 index 00000000000000..2b7aeb7039ec9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_bajanthrimadhu_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_finetuned_subjqa_movies_2_bajanthrimadhu_pipeline pipeline RoBertaForQuestionAnswering from BajanthriMadhu +author: John Snow Labs +name: roberta_finetuned_subjqa_movies_2_bajanthrimadhu_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_finetuned_subjqa_movies_2_bajanthrimadhu_pipeline` is a English model originally trained by BajanthriMadhu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_bajanthrimadhu_pipeline_en_5.5.1_3.0_1731291765523.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_bajanthrimadhu_pipeline_en_5.5.1_3.0_1731291765523.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_finetuned_subjqa_movies_2_bajanthrimadhu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_finetuned_subjqa_movies_2_bajanthrimadhu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_finetuned_subjqa_movies_2_bajanthrimadhu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.6 MB| + +## References + +https://huggingface.co/BajanthriMadhu/roberta-finetuned-subjqa-movies_2 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_dcrowleymunster_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_dcrowleymunster_en.md new file mode 100644 index 00000000000000..a1b192f965294f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_dcrowleymunster_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_finetuned_subjqa_movies_2_dcrowleymunster RoBertaForQuestionAnswering from dcrowleymunster +author: John Snow Labs +name: roberta_finetuned_subjqa_movies_2_dcrowleymunster +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_finetuned_subjqa_movies_2_dcrowleymunster` is a English model originally trained by dcrowleymunster. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_dcrowleymunster_en_5.5.1_3.0_1731291888611.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_dcrowleymunster_en_5.5.1_3.0_1731291888611.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_finetuned_subjqa_movies_2_dcrowleymunster","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_finetuned_subjqa_movies_2_dcrowleymunster", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_finetuned_subjqa_movies_2_dcrowleymunster| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|464.1 MB| + +## References + +https://huggingface.co/dcrowleymunster/roberta-finetuned-subjqa-movies_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_dcrowleymunster_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_dcrowleymunster_pipeline_en.md new file mode 100644 index 00000000000000..bea64e2b984f4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_dcrowleymunster_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_finetuned_subjqa_movies_2_dcrowleymunster_pipeline pipeline RoBertaForQuestionAnswering from dcrowleymunster +author: John Snow Labs +name: roberta_finetuned_subjqa_movies_2_dcrowleymunster_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_finetuned_subjqa_movies_2_dcrowleymunster_pipeline` is a English model originally trained by dcrowleymunster. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_dcrowleymunster_pipeline_en_5.5.1_3.0_1731291912750.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_dcrowleymunster_pipeline_en_5.5.1_3.0_1731291912750.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_finetuned_subjqa_movies_2_dcrowleymunster_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_finetuned_subjqa_movies_2_dcrowleymunster_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_finetuned_subjqa_movies_2_dcrowleymunster_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|464.1 MB| + +## References + +https://huggingface.co/dcrowleymunster/roberta-finetuned-subjqa-movies_2 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_moussamoustapha_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_moussamoustapha_en.md new file mode 100644 index 00000000000000..12ab1e5928ca39 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_moussamoustapha_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_finetuned_subjqa_movies_2_moussamoustapha RoBertaForQuestionAnswering from MoussaMoustapha +author: John Snow Labs +name: roberta_finetuned_subjqa_movies_2_moussamoustapha +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_finetuned_subjqa_movies_2_moussamoustapha` is a English model originally trained by MoussaMoustapha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_moussamoustapha_en_5.5.1_3.0_1731291906700.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_moussamoustapha_en_5.5.1_3.0_1731291906700.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_finetuned_subjqa_movies_2_moussamoustapha","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_finetuned_subjqa_movies_2_moussamoustapha", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_finetuned_subjqa_movies_2_moussamoustapha| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|464.1 MB| + +## References + +https://huggingface.co/MoussaMoustapha/roberta-finetuned-subjqa-movies_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_moussamoustapha_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_moussamoustapha_pipeline_en.md new file mode 100644 index 00000000000000..c008e8e13c60d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_finetuned_subjqa_movies_2_moussamoustapha_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_finetuned_subjqa_movies_2_moussamoustapha_pipeline pipeline RoBertaForQuestionAnswering from MoussaMoustapha +author: John Snow Labs +name: roberta_finetuned_subjqa_movies_2_moussamoustapha_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_finetuned_subjqa_movies_2_moussamoustapha_pipeline` is a English model originally trained by MoussaMoustapha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_moussamoustapha_pipeline_en_5.5.1_3.0_1731291930779.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_moussamoustapha_pipeline_en_5.5.1_3.0_1731291930779.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_finetuned_subjqa_movies_2_moussamoustapha_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_finetuned_subjqa_movies_2_moussamoustapha_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_finetuned_subjqa_movies_2_moussamoustapha_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|464.1 MB| + +## References + +https://huggingface.co/MoussaMoustapha/roberta-finetuned-subjqa-movies_2 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_large_finetuned_abbr_filtered_plod_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_large_finetuned_abbr_filtered_plod_en.md new file mode 100644 index 00000000000000..5e7ed41bcfe692 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_large_finetuned_abbr_filtered_plod_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_large_finetuned_abbr_filtered_plod RoBertaForTokenClassification from surrey-nlp +author: John Snow Labs +name: roberta_large_finetuned_abbr_filtered_plod +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_finetuned_abbr_filtered_plod` is a English model originally trained by surrey-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_finetuned_abbr_filtered_plod_en_5.5.1_3.0_1731311931491.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_finetuned_abbr_filtered_plod_en_5.5.1_3.0_1731311931491.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_large_finetuned_abbr_filtered_plod","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_large_finetuned_abbr_filtered_plod", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_finetuned_abbr_filtered_plod| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/surrey-nlp/roberta-large-finetuned-abbr-filtered-plod \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_large_finetuned_ner_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_large_finetuned_ner_finetuned_ner_en.md new file mode 100644 index 00000000000000..e0462a49903f4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_large_finetuned_ner_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_large_finetuned_ner_finetuned_ner RoBertaForTokenClassification from EngTig +author: John Snow Labs +name: roberta_large_finetuned_ner_finetuned_ner +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_finetuned_ner_finetuned_ner` is a English model originally trained by EngTig. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_finetuned_ner_finetuned_ner_en_5.5.1_3.0_1731314705749.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_finetuned_ner_finetuned_ner_en_5.5.1_3.0_1731314705749.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_large_finetuned_ner_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_large_finetuned_ner_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_finetuned_ner_finetuned_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/EngTig/roberta-large-finetuned-ner-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_large_finetuned_ner_finetuned_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_large_finetuned_ner_finetuned_ner_pipeline_en.md new file mode 100644 index 00000000000000..2f468654123ec1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_large_finetuned_ner_finetuned_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_large_finetuned_ner_finetuned_ner_pipeline pipeline RoBertaForTokenClassification from EngTig +author: John Snow Labs +name: roberta_large_finetuned_ner_finetuned_ner_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_finetuned_ner_finetuned_ner_pipeline` is a English model originally trained by EngTig. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_finetuned_ner_finetuned_ner_pipeline_en_5.5.1_3.0_1731314773896.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_finetuned_ner_finetuned_ner_pipeline_en_5.5.1_3.0_1731314773896.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_finetuned_ner_finetuned_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_finetuned_ner_finetuned_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_finetuned_ner_finetuned_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/EngTig/roberta-large-finetuned-ner-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_large_lemon_spell_5k_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_large_lemon_spell_5k_pipeline_en.md new file mode 100644 index 00000000000000..4e6b72c0baef7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_large_lemon_spell_5k_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_large_lemon_spell_5k_pipeline pipeline RoBertaForTokenClassification from manred1997 +author: John Snow Labs +name: roberta_large_lemon_spell_5k_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_lemon_spell_5k_pipeline` is a English model originally trained by manred1997. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_lemon_spell_5k_pipeline_en_5.5.1_3.0_1731311835916.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_lemon_spell_5k_pipeline_en_5.5.1_3.0_1731311835916.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_lemon_spell_5k_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_lemon_spell_5k_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_lemon_spell_5k_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/manred1997/roberta-large_lemon-spell_5k + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_spanish_clinical_trials_neg_spec_ner_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_spanish_clinical_trials_neg_spec_ner_en.md new file mode 100644 index 00000000000000..b2f5d749d07255 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_spanish_clinical_trials_neg_spec_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_spanish_clinical_trials_neg_spec_ner RoBertaForTokenClassification from medspaner +author: John Snow Labs +name: roberta_spanish_clinical_trials_neg_spec_ner +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_spanish_clinical_trials_neg_spec_ner` is a English model originally trained by medspaner. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_spanish_clinical_trials_neg_spec_ner_en_5.5.1_3.0_1731314639240.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_spanish_clinical_trials_neg_spec_ner_en_5.5.1_3.0_1731314639240.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_spanish_clinical_trials_neg_spec_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_spanish_clinical_trials_neg_spec_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_spanish_clinical_trials_neg_spec_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|450.7 MB| + +## References + +https://huggingface.co/medspaner/roberta-es-clinical-trials-neg-spec-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_test_training_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_test_training_en.md new file mode 100644 index 00000000000000..a2add6cf1f0a02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_test_training_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_test_training RoBertaForTokenClassification from ICT2214Team7 +author: John Snow Labs +name: roberta_test_training +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_test_training` is a English model originally trained by ICT2214Team7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_test_training_en_5.5.1_3.0_1731311877878.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_test_training_en_5.5.1_3.0_1731311877878.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_test_training","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_test_training", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_test_training| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/ICT2214Team7/RoBERTa_Test_Training \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-roberta_test_training_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-roberta_test_training_pipeline_en.md new file mode 100644 index 00000000000000..d66dbdcc87894b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-roberta_test_training_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_test_training_pipeline pipeline RoBertaForTokenClassification from ICT2214Team7 +author: John Snow Labs +name: roberta_test_training_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_test_training_pipeline` is a English model originally trained by ICT2214Team7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_test_training_pipeline_en_5.5.1_3.0_1731311894107.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_test_training_pipeline_en_5.5.1_3.0_1731311894107.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_test_training_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_test_training_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_test_training_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.6 MB| + +## References + +https://huggingface.co/ICT2214Team7/RoBERTa_Test_Training + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-rubert_finetuned_squad_en.md b/docs/_posts/ahmedlone127/2024-11-11-rubert_finetuned_squad_en.md new file mode 100644 index 00000000000000..e5c3d1be04bc2c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-rubert_finetuned_squad_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English rubert_finetuned_squad BertForQuestionAnswering from sad-bkt +author: John Snow Labs +name: rubert_finetuned_squad +date: 2024-11-11 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_finetuned_squad` is a English model originally trained by sad-bkt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_finetuned_squad_en_5.5.1_3.0_1731288820745.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_finetuned_squad_en_5.5.1_3.0_1731288820745.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("rubert_finetuned_squad","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("rubert_finetuned_squad", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_finetuned_squad| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|664.3 MB| + +## References + +https://huggingface.co/sad-bkt/rubert-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-rubert_finetuned_squad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-rubert_finetuned_squad_pipeline_en.md new file mode 100644 index 00000000000000..20919abae61627 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-rubert_finetuned_squad_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English rubert_finetuned_squad_pipeline pipeline BertForQuestionAnswering from sad-bkt +author: John Snow Labs +name: rubert_finetuned_squad_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_finetuned_squad_pipeline` is a English model originally trained by sad-bkt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_finetuned_squad_pipeline_en_5.5.1_3.0_1731288867678.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_finetuned_squad_pipeline_en_5.5.1_3.0_1731288867678.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rubert_finetuned_squad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rubert_finetuned_squad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_finetuned_squad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|664.3 MB| + +## References + +https://huggingface.co/sad-bkt/rubert-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-rubert_russian_qa_sberquad_pipeline_ru.md b/docs/_posts/ahmedlone127/2024-11-11-rubert_russian_qa_sberquad_pipeline_ru.md new file mode 100644 index 00000000000000..f87c9c44b4e02a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-rubert_russian_qa_sberquad_pipeline_ru.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Russian rubert_russian_qa_sberquad_pipeline pipeline BertForQuestionAnswering from milyausha2801 +author: John Snow Labs +name: rubert_russian_qa_sberquad_pipeline +date: 2024-11-11 +tags: [ru, open_source, pipeline, onnx] +task: Question Answering +language: ru +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_russian_qa_sberquad_pipeline` is a Russian model originally trained by milyausha2801. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_russian_qa_sberquad_pipeline_ru_5.5.1_3.0_1731289278063.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_russian_qa_sberquad_pipeline_ru_5.5.1_3.0_1731289278063.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rubert_russian_qa_sberquad_pipeline", lang = "ru") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rubert_russian_qa_sberquad_pipeline", lang = "ru") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_russian_qa_sberquad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ru| +|Size:|664.3 MB| + +## References + +https://huggingface.co/milyausha2801/rubert-russian-qa-sberquad + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-rubert_russian_qa_sberquad_ru.md b/docs/_posts/ahmedlone127/2024-11-11-rubert_russian_qa_sberquad_ru.md new file mode 100644 index 00000000000000..e86419e4db3e19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-rubert_russian_qa_sberquad_ru.md @@ -0,0 +1,86 @@ +--- +layout: model +title: Russian rubert_russian_qa_sberquad BertForQuestionAnswering from milyausha2801 +author: John Snow Labs +name: rubert_russian_qa_sberquad +date: 2024-11-11 +tags: [ru, open_source, onnx, question_answering, bert] +task: Question Answering +language: ru +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_russian_qa_sberquad` is a Russian model originally trained by milyausha2801. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_russian_qa_sberquad_ru_5.5.1_3.0_1731289240932.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_russian_qa_sberquad_ru_5.5.1_3.0_1731289240932.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("rubert_russian_qa_sberquad","ru") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("rubert_russian_qa_sberquad", "ru") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_russian_qa_sberquad| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|ru| +|Size:|664.3 MB| + +## References + +https://huggingface.co/milyausha2801/rubert-russian-qa-sberquad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-rupunct_small_pipeline_ru.md b/docs/_posts/ahmedlone127/2024-11-11-rupunct_small_pipeline_ru.md new file mode 100644 index 00000000000000..990b9ad591fd02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-rupunct_small_pipeline_ru.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Russian rupunct_small_pipeline pipeline BertForTokenClassification from RUPunct +author: John Snow Labs +name: rupunct_small_pipeline +date: 2024-11-11 +tags: [ru, open_source, pipeline, onnx] +task: Named Entity Recognition +language: ru +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rupunct_small_pipeline` is a Russian model originally trained by RUPunct. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rupunct_small_pipeline_ru_5.5.1_3.0_1731299090139.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rupunct_small_pipeline_ru_5.5.1_3.0_1731299090139.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rupunct_small_pipeline", lang = "ru") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rupunct_small_pipeline", lang = "ru") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rupunct_small_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ru| +|Size:|109.2 MB| + +## References + +https://huggingface.co/RUPunct/RUPunct_small + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-rupunct_small_ru.md b/docs/_posts/ahmedlone127/2024-11-11-rupunct_small_ru.md new file mode 100644 index 00000000000000..8e85fe7ceb62b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-rupunct_small_ru.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Russian rupunct_small BertForTokenClassification from RUPunct +author: John Snow Labs +name: rupunct_small +date: 2024-11-11 +tags: [ru, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: ru +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rupunct_small` is a Russian model originally trained by RUPunct. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rupunct_small_ru_5.5.1_3.0_1731299084262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rupunct_small_ru_5.5.1_3.0_1731299084262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("rupunct_small","ru") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("rupunct_small", "ru") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rupunct_small| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|ru| +|Size:|109.2 MB| + +## References + +https://huggingface.co/RUPunct/RUPunct_small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-ruroberta_large_ner_en.md b/docs/_posts/ahmedlone127/2024-11-11-ruroberta_large_ner_en.md new file mode 100644 index 00000000000000..3bb5724da55871 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-ruroberta_large_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ruroberta_large_ner RoBertaForTokenClassification from KobanBanan +author: John Snow Labs +name: ruroberta_large_ner +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ruroberta_large_ner` is a English model originally trained by KobanBanan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ruroberta_large_ner_en_5.5.1_3.0_1731314157839.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ruroberta_large_ner_en_5.5.1_3.0_1731314157839.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("ruroberta_large_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("ruroberta_large_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ruroberta_large_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/KobanBanan/ruRoberta-large_ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-ruroberta_large_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-ruroberta_large_ner_pipeline_en.md new file mode 100644 index 00000000000000..9cf3826d7590c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-ruroberta_large_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ruroberta_large_ner_pipeline pipeline RoBertaForTokenClassification from KobanBanan +author: John Snow Labs +name: ruroberta_large_ner_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ruroberta_large_ner_pipeline` is a English model originally trained by KobanBanan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ruroberta_large_ner_pipeline_en_5.5.1_3.0_1731314229337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ruroberta_large_ner_pipeline_en_5.5.1_3.0_1731314229337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ruroberta_large_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ruroberta_large_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ruroberta_large_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/KobanBanan/ruRoberta-large_ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-scenario_nepal_bhasa_en.md b/docs/_posts/ahmedlone127/2024-11-11-scenario_nepal_bhasa_en.md new file mode 100644 index 00000000000000..c7093e0b973f82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-scenario_nepal_bhasa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English scenario_nepal_bhasa BertForSequenceClassification from Mahmoud3899 +author: John Snow Labs +name: scenario_nepal_bhasa +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scenario_nepal_bhasa` is a English model originally trained by Mahmoud3899. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scenario_nepal_bhasa_en_5.5.1_3.0_1731310227574.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scenario_nepal_bhasa_en_5.5.1_3.0_1731310227574.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("scenario_nepal_bhasa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("scenario_nepal_bhasa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scenario_nepal_bhasa| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Mahmoud3899/scenario_new \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-scenario_nepal_bhasa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-scenario_nepal_bhasa_pipeline_en.md new file mode 100644 index 00000000000000..659b60d1e70c3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-scenario_nepal_bhasa_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English scenario_nepal_bhasa_pipeline pipeline BertForSequenceClassification from Mahmoud3899 +author: John Snow Labs +name: scenario_nepal_bhasa_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scenario_nepal_bhasa_pipeline` is a English model originally trained by Mahmoud3899. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scenario_nepal_bhasa_pipeline_en_5.5.1_3.0_1731310249022.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scenario_nepal_bhasa_pipeline_en_5.5.1_3.0_1731310249022.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("scenario_nepal_bhasa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("scenario_nepal_bhasa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scenario_nepal_bhasa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Mahmoud3899/scenario_new + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-screenshot_fashion_clip_finetuned_en.md b/docs/_posts/ahmedlone127/2024-11-11-screenshot_fashion_clip_finetuned_en.md new file mode 100644 index 00000000000000..ed8c1582e10d65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-screenshot_fashion_clip_finetuned_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English screenshot_fashion_clip_finetuned CLIPForZeroShotClassification from justin-shopcapsule +author: John Snow Labs +name: screenshot_fashion_clip_finetuned +date: 2024-11-11 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`screenshot_fashion_clip_finetuned` is a English model originally trained by justin-shopcapsule. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/screenshot_fashion_clip_finetuned_en_5.5.1_3.0_1731287543136.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/screenshot_fashion_clip_finetuned_en_5.5.1_3.0_1731287543136.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("screenshot_fashion_clip_finetuned","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("screenshot_fashion_clip_finetuned","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|screenshot_fashion_clip_finetuned| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|567.5 MB| + +## References + +https://huggingface.co/justin-shopcapsule/screenshot-fashion-clip-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-screenshot_fashion_clip_finetuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-screenshot_fashion_clip_finetuned_pipeline_en.md new file mode 100644 index 00000000000000..102d8b6c14a858 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-screenshot_fashion_clip_finetuned_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English screenshot_fashion_clip_finetuned_pipeline pipeline CLIPForZeroShotClassification from justin-shopcapsule +author: John Snow Labs +name: screenshot_fashion_clip_finetuned_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`screenshot_fashion_clip_finetuned_pipeline` is a English model originally trained by justin-shopcapsule. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/screenshot_fashion_clip_finetuned_pipeline_en_5.5.1_3.0_1731287572581.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/screenshot_fashion_clip_finetuned_pipeline_en_5.5.1_3.0_1731287572581.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("screenshot_fashion_clip_finetuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("screenshot_fashion_clip_finetuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|screenshot_fashion_clip_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|567.6 MB| + +## References + +https://huggingface.co/justin-shopcapsule/screenshot-fashion-clip-finetuned + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-securebert_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-11-11-securebert_finetuned_ner_en.md new file mode 100644 index 00000000000000..1b411577d74f8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-securebert_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English securebert_finetuned_ner RoBertaForTokenClassification from zohreaz +author: John Snow Labs +name: securebert_finetuned_ner +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`securebert_finetuned_ner` is a English model originally trained by zohreaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/securebert_finetuned_ner_en_5.5.1_3.0_1731311208086.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/securebert_finetuned_ner_en_5.5.1_3.0_1731311208086.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("securebert_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("securebert_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|securebert_finetuned_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|466.4 MB| + +## References + +https://huggingface.co/zohreaz/SecureBERT-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-securebert_finetuned_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-securebert_finetuned_ner_pipeline_en.md new file mode 100644 index 00000000000000..2b9c4fceb757e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-securebert_finetuned_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English securebert_finetuned_ner_pipeline pipeline RoBertaForTokenClassification from zohreaz +author: John Snow Labs +name: securebert_finetuned_ner_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`securebert_finetuned_ner_pipeline` is a English model originally trained by zohreaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/securebert_finetuned_ner_pipeline_en_5.5.1_3.0_1731311234765.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/securebert_finetuned_ner_pipeline_en_5.5.1_3.0_1731311234765.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("securebert_finetuned_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("securebert_finetuned_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|securebert_finetuned_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.4 MB| + +## References + +https://huggingface.co/zohreaz/SecureBERT-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_7_epoch_edu_model_finetuned_fintech_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_7_epoch_edu_model_finetuned_fintech_en.md new file mode 100644 index 00000000000000..42a54be53d2e4b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_7_epoch_edu_model_finetuned_fintech_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_7_epoch_edu_model_finetuned_fintech BertSentenceEmbeddings from Pastushoc +author: John Snow Labs +name: sent_7_epoch_edu_model_finetuned_fintech +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_7_epoch_edu_model_finetuned_fintech` is a English model originally trained by Pastushoc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_7_epoch_edu_model_finetuned_fintech_en_5.5.1_3.0_1731295684263.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_7_epoch_edu_model_finetuned_fintech_en_5.5.1_3.0_1731295684263.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_7_epoch_edu_model_finetuned_fintech","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_7_epoch_edu_model_finetuned_fintech","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_7_epoch_edu_model_finetuned_fintech| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|108.7 MB| + +## References + +https://huggingface.co/Pastushoc/7_epoch_edu_model-finetuned-fintech \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_7_epoch_edu_model_finetuned_fintech_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_7_epoch_edu_model_finetuned_fintech_pipeline_en.md new file mode 100644 index 00000000000000..5e2b17bccd2d2b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_7_epoch_edu_model_finetuned_fintech_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_7_epoch_edu_model_finetuned_fintech_pipeline pipeline BertSentenceEmbeddings from Pastushoc +author: John Snow Labs +name: sent_7_epoch_edu_model_finetuned_fintech_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_7_epoch_edu_model_finetuned_fintech_pipeline` is a English model originally trained by Pastushoc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_7_epoch_edu_model_finetuned_fintech_pipeline_en_5.5.1_3.0_1731295690134.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_7_epoch_edu_model_finetuned_fintech_pipeline_en_5.5.1_3.0_1731295690134.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_7_epoch_edu_model_finetuned_fintech_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_7_epoch_edu_model_finetuned_fintech_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_7_epoch_edu_model_finetuned_fintech_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|109.3 MB| + +## References + +https://huggingface.co/Pastushoc/7_epoch_edu_model-finetuned-fintech + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_bert_base_english_french_arabic_cased_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_bert_base_english_french_arabic_cased_en.md new file mode 100644 index 00000000000000..596ef115ca3bfe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_bert_base_english_french_arabic_cased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_bert_base_english_french_arabic_cased BertSentenceEmbeddings from Geotrend +author: John Snow Labs +name: sent_bert_base_english_french_arabic_cased +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_english_french_arabic_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_english_french_arabic_cased_en_5.5.1_3.0_1731296029932.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_english_french_arabic_cased_en_5.5.1_3.0_1731296029932.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_english_french_arabic_cased","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_english_french_arabic_cased","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_english_french_arabic_cased| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|426.0 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-ar-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_bert_base_english_french_arabic_cased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_bert_base_english_french_arabic_cased_pipeline_en.md new file mode 100644 index 00000000000000..d3aaf7c6ebedb1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_bert_base_english_french_arabic_cased_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_bert_base_english_french_arabic_cased_pipeline pipeline BertSentenceEmbeddings from Geotrend +author: John Snow Labs +name: sent_bert_base_english_french_arabic_cased_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_english_french_arabic_cased_pipeline` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_english_french_arabic_cased_pipeline_en_5.5.1_3.0_1731296051698.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_english_french_arabic_cased_pipeline_en_5.5.1_3.0_1731296051698.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_english_french_arabic_cased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_english_french_arabic_cased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_english_french_arabic_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|426.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-ar-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_bert_large_cased_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_bert_large_cased_en.md new file mode 100644 index 00000000000000..6256a2727669dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_bert_large_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: BERT Sentence Embeddings (Large Cased) +author: John Snow Labs +name: sent_bert_large_cased +date: 2024-11-11 +tags: [open_source, embeddings, en, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model contains a deep bidirectional transformer trained on Wikipedia and the BookCorpus. The details are described in the paper "[BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805)". + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_large_cased_en_5.5.1_3.0_1731295875935.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_large_cased_en_5.5.1_3.0_1731295875935.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +... +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_large_cased", "en") \ +.setInputCols("sentence") \ +.setOutputCol("sentence_embeddings") +nlp_pipeline = Pipeline(stages=[document_assembler, sentence_detector, embeddings]) +pipeline_model = nlp_pipeline.fit(spark.createDataFrame([[""]]).toDF("text")) +result = pipeline_model.transform(spark.createDataFrame([['I hate cancer', "Antibiotics aren't painkiller"]], ["text"])) +``` +```scala +... +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_large_cased", "en") +.setInputCols("sentence") +.setOutputCol("sentence_embeddings") +val pipeline = new Pipeline().setStages(Array(document_assembler, sentence_detector, embeddings)) +val data = Seq("I hate cancer", "Antibiotics aren't painkiller").toDF("text") +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu + +text = ["I hate cancer", "Antibiotics aren't painkiller"] +embeddings_df = nlu.load('en.embed_sentence.bert_large_cased').predict(text, output_level='sentence') +embeddings_df +``` +
+ +## Results + +```bash + + + token en_embed_sentence_bert_large_cased_embeddings + + I [[-0.6228358149528503, -0.3453695774078369, 0.... +love [[-0.6228358149528503, -0.3453695774078369, 0.... +NLP [[-0.6228358149528503, -0.3453695774078369, 0.... +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_large_cased| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_bert_large_cased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_bert_large_cased_pipeline_en.md new file mode 100644 index 00000000000000..20f8f983a6fe25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_bert_large_cased_pipeline_en.md @@ -0,0 +1,73 @@ +--- +layout: model +title: English sent_bert_large_cased_pipeline pipeline BertSentenceEmbeddings from google-bert +author: John Snow Labs +name: sent_bert_large_cased_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_large_cased_pipeline` is a English model originally trained by google-bert. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_large_cased_pipeline_en_5.5.1_3.0_1731295939295.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_large_cased_pipeline_en_5.5.1_3.0_1731295939295.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +pipeline = PretrainedPipeline("sent_bert_large_cased_pipeline", lang = "en") +annotations = pipeline.transform(df) +``` +```scala +val pipeline = new PretrainedPipeline("sent_bert_large_cased_pipeline", lang = "en") +val annotations = pipeline.transform(df) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_large_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +References + +https://huggingface.co/google-bert/bert-large-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_bert_small_uncased_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_bert_small_uncased_en.md new file mode 100644 index 00000000000000..396ecfa34b8088 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_bert_small_uncased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_bert_small_uncased BertSentenceEmbeddings from gaunernst +author: John Snow Labs +name: sent_bert_small_uncased +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_small_uncased` is a English model originally trained by gaunernst. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_small_uncased_en_5.5.1_3.0_1731295492148.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_small_uncased_en_5.5.1_3.0_1731295492148.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_small_uncased","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_small_uncased","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_small_uncased| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/gaunernst/bert-small-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_bert_small_uncased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_bert_small_uncased_pipeline_en.md new file mode 100644 index 00000000000000..41d9628eebb246 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_bert_small_uncased_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_bert_small_uncased_pipeline pipeline BertSentenceEmbeddings from gaunernst +author: John Snow Labs +name: sent_bert_small_uncased_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_small_uncased_pipeline` is a English model originally trained by gaunernst. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_small_uncased_pipeline_en_5.5.1_3.0_1731295497059.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_small_uncased_pipeline_en_5.5.1_3.0_1731295497059.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_small_uncased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_small_uncased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_small_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|107.5 MB| + +## References + +https://huggingface.co/gaunernst/bert-small-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_bowdpr_wiki_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_bowdpr_wiki_en.md new file mode 100644 index 00000000000000..7b4bff50cf0205 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_bowdpr_wiki_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_bowdpr_wiki BertSentenceEmbeddings from bowdpr +author: John Snow Labs +name: sent_bowdpr_wiki +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bowdpr_wiki` is a English model originally trained by bowdpr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bowdpr_wiki_en_5.5.1_3.0_1731295788052.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bowdpr_wiki_en_5.5.1_3.0_1731295788052.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bowdpr_wiki","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bowdpr_wiki","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bowdpr_wiki| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/bowdpr/bowdpr_wiki \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_bowdpr_wiki_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_bowdpr_wiki_pipeline_en.md new file mode 100644 index 00000000000000..582cd91ff203ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_bowdpr_wiki_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_bowdpr_wiki_pipeline pipeline BertSentenceEmbeddings from bowdpr +author: John Snow Labs +name: sent_bowdpr_wiki_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bowdpr_wiki_pipeline` is a English model originally trained by bowdpr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bowdpr_wiki_pipeline_en_5.5.1_3.0_1731295808981.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bowdpr_wiki_pipeline_en_5.5.1_3.0_1731295808981.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bowdpr_wiki_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bowdpr_wiki_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bowdpr_wiki_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.5 MB| + +## References + +https://huggingface.co/bowdpr/bowdpr_wiki + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_cocodr_large_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_cocodr_large_en.md new file mode 100644 index 00000000000000..7caf59748d94eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_cocodr_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_cocodr_large BertSentenceEmbeddings from OpenMatch +author: John Snow Labs +name: sent_cocodr_large +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_cocodr_large` is a English model originally trained by OpenMatch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_cocodr_large_en_5.5.1_3.0_1731296353493.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_cocodr_large_en_5.5.1_3.0_1731296353493.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_cocodr_large","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_cocodr_large","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_cocodr_large| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/OpenMatch/cocodr-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_cocodr_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_cocodr_large_pipeline_en.md new file mode 100644 index 00000000000000..fdcc0429397e6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_cocodr_large_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_cocodr_large_pipeline pipeline BertSentenceEmbeddings from OpenMatch +author: John Snow Labs +name: sent_cocodr_large_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_cocodr_large_pipeline` is a English model originally trained by OpenMatch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_cocodr_large_pipeline_en_5.5.1_3.0_1731296416055.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_cocodr_large_pipeline_en_5.5.1_3.0_1731296416055.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_cocodr_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_cocodr_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_cocodr_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/OpenMatch/cocodr-large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_flang_spanbert_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_flang_spanbert_en.md new file mode 100644 index 00000000000000..00ffea7911dde4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_flang_spanbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_flang_spanbert BertSentenceEmbeddings from SALT-NLP +author: John Snow Labs +name: sent_flang_spanbert +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_flang_spanbert` is a English model originally trained by SALT-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_flang_spanbert_en_5.5.1_3.0_1731295507910.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_flang_spanbert_en_5.5.1_3.0_1731295507910.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_flang_spanbert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_flang_spanbert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_flang_spanbert| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/SALT-NLP/FLANG-SpanBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_flang_spanbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_flang_spanbert_pipeline_en.md new file mode 100644 index 00000000000000..3e59ffc19a4472 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_flang_spanbert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_flang_spanbert_pipeline pipeline BertSentenceEmbeddings from SALT-NLP +author: John Snow Labs +name: sent_flang_spanbert_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_flang_spanbert_pipeline` is a English model originally trained by SALT-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_flang_spanbert_pipeline_en_5.5.1_3.0_1731295529427.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_flang_spanbert_pipeline_en_5.5.1_3.0_1731295529427.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_flang_spanbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_flang_spanbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_flang_spanbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|404.1 MB| + +## References + +https://huggingface.co/SALT-NLP/FLANG-SpanBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_gww_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_gww_en.md new file mode 100644 index 00000000000000..2cf6dab7b7a777 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_gww_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_gww BertSentenceEmbeddings from dunlp +author: John Snow Labs +name: sent_gww +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_gww` is a English model originally trained by dunlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_gww_en_5.5.1_3.0_1731296186768.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_gww_en_5.5.1_3.0_1731296186768.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_gww","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_gww","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_gww| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/dunlp/GWW \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_gww_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_gww_pipeline_en.md new file mode 100644 index 00000000000000..c94c30884dffce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_gww_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_gww_pipeline pipeline BertSentenceEmbeddings from dunlp +author: John Snow Labs +name: sent_gww_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_gww_pipeline` is a English model originally trained by dunlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_gww_pipeline_en_5.5.1_3.0_1731296207716.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_gww_pipeline_en_5.5.1_3.0_1731296207716.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_gww_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_gww_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_gww_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/dunlp/GWW + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_hindi_tweets_bert_hateful_hi.md b/docs/_posts/ahmedlone127/2024-11-11-sent_hindi_tweets_bert_hateful_hi.md new file mode 100644 index 00000000000000..1206e880b00eb4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_hindi_tweets_bert_hateful_hi.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Hindi sent_hindi_tweets_bert_hateful BertSentenceEmbeddings from l3cube-pune +author: John Snow Labs +name: sent_hindi_tweets_bert_hateful +date: 2024-11-11 +tags: [hi, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: hi +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_hindi_tweets_bert_hateful` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_hindi_tweets_bert_hateful_hi_5.5.1_3.0_1731295605957.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_hindi_tweets_bert_hateful_hi_5.5.1_3.0_1731295605957.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_hindi_tweets_bert_hateful","hi") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_hindi_tweets_bert_hateful","hi") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_hindi_tweets_bert_hateful| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-tweets-bert-hateful \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_hindi_tweets_bert_hateful_pipeline_hi.md b/docs/_posts/ahmedlone127/2024-11-11-sent_hindi_tweets_bert_hateful_pipeline_hi.md new file mode 100644 index 00000000000000..b32d4fbe8decb2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_hindi_tweets_bert_hateful_pipeline_hi.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Hindi sent_hindi_tweets_bert_hateful_pipeline pipeline BertSentenceEmbeddings from l3cube-pune +author: John Snow Labs +name: sent_hindi_tweets_bert_hateful_pipeline +date: 2024-11-11 +tags: [hi, open_source, pipeline, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_hindi_tweets_bert_hateful_pipeline` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_hindi_tweets_bert_hateful_pipeline_hi_5.5.1_3.0_1731295656628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_hindi_tweets_bert_hateful_pipeline_hi_5.5.1_3.0_1731295656628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_hindi_tweets_bert_hateful_pipeline", lang = "hi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_hindi_tweets_bert_hateful_pipeline", lang = "hi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_hindi_tweets_bert_hateful_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|hi| +|Size:|891.2 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-tweets-bert-hateful + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_hinglish_bert_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_hinglish_bert_en.md new file mode 100644 index 00000000000000..0f0e6559c1ccfa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_hinglish_bert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_hinglish_bert BertSentenceEmbeddings from meghanabhange +author: John Snow Labs +name: sent_hinglish_bert +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_hinglish_bert` is a English model originally trained by meghanabhange. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_hinglish_bert_en_5.5.1_3.0_1731296141121.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_hinglish_bert_en_5.5.1_3.0_1731296141121.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_hinglish_bert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_hinglish_bert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_hinglish_bert| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/meghanabhange/Hinglish-Bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_hinglish_bert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_hinglish_bert_pipeline_en.md new file mode 100644 index 00000000000000..8bcbeff1e1404e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_hinglish_bert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_hinglish_bert_pipeline pipeline BertSentenceEmbeddings from meghanabhange +author: John Snow Labs +name: sent_hinglish_bert_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_hinglish_bert_pipeline` is a English model originally trained by meghanabhange. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_hinglish_bert_pipeline_en_5.5.1_3.0_1731296175548.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_hinglish_bert_pipeline_en_5.5.1_3.0_1731296175548.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_hinglish_bert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_hinglish_bert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_hinglish_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|665.6 MB| + +## References + +https://huggingface.co/meghanabhange/Hinglish-Bert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_kinyabert_small_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_kinyabert_small_en.md new file mode 100644 index 00000000000000..fa36e89fefb180 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_kinyabert_small_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_kinyabert_small BertSentenceEmbeddings from jean-paul +author: John Snow Labs +name: sent_kinyabert_small +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_kinyabert_small` is a English model originally trained by jean-paul. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_kinyabert_small_en_5.5.1_3.0_1731296300457.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_kinyabert_small_en_5.5.1_3.0_1731296300457.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_kinyabert_small","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_kinyabert_small","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_kinyabert_small| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/jean-paul/KinyaBERT-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_kinyabert_small_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_kinyabert_small_pipeline_en.md new file mode 100644 index 00000000000000..9357bf7410de47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_kinyabert_small_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_kinyabert_small_pipeline pipeline BertSentenceEmbeddings from jean-paul +author: John Snow Labs +name: sent_kinyabert_small_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_kinyabert_small_pipeline` is a English model originally trained by jean-paul. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_kinyabert_small_pipeline_en_5.5.1_3.0_1731296313640.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_kinyabert_small_pipeline_en_5.5.1_3.0_1731296313640.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_kinyabert_small_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_kinyabert_small_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_kinyabert_small_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|248.0 MB| + +## References + +https://huggingface.co/jean-paul/KinyaBERT-small + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_legal_bert_base_uncased_finetuned_rramicus_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_legal_bert_base_uncased_finetuned_rramicus_en.md new file mode 100644 index 00000000000000..a2068c527f4a0b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_legal_bert_base_uncased_finetuned_rramicus_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_legal_bert_base_uncased_finetuned_rramicus BertSentenceEmbeddings from repro-rights-amicus-briefs +author: John Snow Labs +name: sent_legal_bert_base_uncased_finetuned_rramicus +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_legal_bert_base_uncased_finetuned_rramicus` is a English model originally trained by repro-rights-amicus-briefs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_legal_bert_base_uncased_finetuned_rramicus_en_5.5.1_3.0_1731295609641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_legal_bert_base_uncased_finetuned_rramicus_en_5.5.1_3.0_1731295609641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_legal_bert_base_uncased_finetuned_rramicus","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_legal_bert_base_uncased_finetuned_rramicus","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_legal_bert_base_uncased_finetuned_rramicus| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/repro-rights-amicus-briefs/legal-bert-base-uncased-finetuned-RRamicus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_legal_bert_base_uncased_finetuned_rramicus_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_legal_bert_base_uncased_finetuned_rramicus_pipeline_en.md new file mode 100644 index 00000000000000..e1feaa3e7fff1a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_legal_bert_base_uncased_finetuned_rramicus_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_legal_bert_base_uncased_finetuned_rramicus_pipeline pipeline BertSentenceEmbeddings from repro-rights-amicus-briefs +author: John Snow Labs +name: sent_legal_bert_base_uncased_finetuned_rramicus_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_legal_bert_base_uncased_finetuned_rramicus_pipeline` is a English model originally trained by repro-rights-amicus-briefs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_legal_bert_base_uncased_finetuned_rramicus_pipeline_en_5.5.1_3.0_1731295639417.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_legal_bert_base_uncased_finetuned_rramicus_pipeline_en_5.5.1_3.0_1731295639417.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_legal_bert_base_uncased_finetuned_rramicus_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_legal_bert_base_uncased_finetuned_rramicus_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_legal_bert_base_uncased_finetuned_rramicus_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/repro-rights-amicus-briefs/legal-bert-base-uncased-finetuned-RRamicus + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_logion_50k_wordpiece_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_logion_50k_wordpiece_en.md new file mode 100644 index 00000000000000..16ef3a4bd3e8eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_logion_50k_wordpiece_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_logion_50k_wordpiece BertSentenceEmbeddings from cabrooks +author: John Snow Labs +name: sent_logion_50k_wordpiece +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_logion_50k_wordpiece` is a English model originally trained by cabrooks. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_logion_50k_wordpiece_en_5.5.1_3.0_1731296515126.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_logion_50k_wordpiece_en_5.5.1_3.0_1731296515126.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_logion_50k_wordpiece","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_logion_50k_wordpiece","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_logion_50k_wordpiece| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.1 MB| + +## References + +https://huggingface.co/cabrooks/LOGION-50k_wordpiece \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_logion_50k_wordpiece_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_logion_50k_wordpiece_pipeline_en.md new file mode 100644 index 00000000000000..db875dd24d6509 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_logion_50k_wordpiece_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_logion_50k_wordpiece_pipeline pipeline BertSentenceEmbeddings from cabrooks +author: John Snow Labs +name: sent_logion_50k_wordpiece_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_logion_50k_wordpiece_pipeline` is a English model originally trained by cabrooks. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_logion_50k_wordpiece_pipeline_en_5.5.1_3.0_1731296537847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_logion_50k_wordpiece_pipeline_en_5.5.1_3.0_1731296537847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_logion_50k_wordpiece_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_logion_50k_wordpiece_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_logion_50k_wordpiece_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|464.7 MB| + +## References + +https://huggingface.co/cabrooks/LOGION-50k_wordpiece + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_mbert_resp_english_chinese_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_mbert_resp_english_chinese_en.md new file mode 100644 index 00000000000000..fde19de7c4545e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_mbert_resp_english_chinese_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_mbert_resp_english_chinese BertSentenceEmbeddings from nikitam +author: John Snow Labs +name: sent_mbert_resp_english_chinese +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_mbert_resp_english_chinese` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_mbert_resp_english_chinese_en_5.5.1_3.0_1731296372061.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_mbert_resp_english_chinese_en_5.5.1_3.0_1731296372061.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_mbert_resp_english_chinese","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_mbert_resp_english_chinese","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_mbert_resp_english_chinese| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.0 MB| + +## References + +https://huggingface.co/nikitam/mbert-resp-en-zh \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_mbert_resp_english_chinese_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_mbert_resp_english_chinese_pipeline_en.md new file mode 100644 index 00000000000000..ac69c89b3dba18 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_mbert_resp_english_chinese_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_mbert_resp_english_chinese_pipeline pipeline BertSentenceEmbeddings from nikitam +author: John Snow Labs +name: sent_mbert_resp_english_chinese_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_mbert_resp_english_chinese_pipeline` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_mbert_resp_english_chinese_pipeline_en_5.5.1_3.0_1731296403530.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_mbert_resp_english_chinese_pipeline_en_5.5.1_3.0_1731296403530.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_mbert_resp_english_chinese_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_mbert_resp_english_chinese_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_mbert_resp_english_chinese_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|625.6 MB| + +## References + +https://huggingface.co/nikitam/mbert-resp-en-zh + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_pak_legal_bert_small_uncased_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_pak_legal_bert_small_uncased_en.md new file mode 100644 index 00000000000000..0601d07658c3e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_pak_legal_bert_small_uncased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_pak_legal_bert_small_uncased BertSentenceEmbeddings from AISystems +author: John Snow Labs +name: sent_pak_legal_bert_small_uncased +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_pak_legal_bert_small_uncased` is a English model originally trained by AISystems. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_pak_legal_bert_small_uncased_en_5.5.1_3.0_1731295493020.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_pak_legal_bert_small_uncased_en_5.5.1_3.0_1731295493020.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_pak_legal_bert_small_uncased","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_pak_legal_bert_small_uncased","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_pak_legal_bert_small_uncased| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|130.5 MB| + +## References + +https://huggingface.co/AISystems/PAK-LEGAL-BERT-small-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_pak_legal_bert_small_uncased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_pak_legal_bert_small_uncased_pipeline_en.md new file mode 100644 index 00000000000000..0fb73d309e24bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_pak_legal_bert_small_uncased_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_pak_legal_bert_small_uncased_pipeline pipeline BertSentenceEmbeddings from AISystems +author: John Snow Labs +name: sent_pak_legal_bert_small_uncased_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_pak_legal_bert_small_uncased_pipeline` is a English model originally trained by AISystems. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_pak_legal_bert_small_uncased_pipeline_en_5.5.1_3.0_1731295500164.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_pak_legal_bert_small_uncased_pipeline_en_5.5.1_3.0_1731295500164.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_pak_legal_bert_small_uncased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_pak_legal_bert_small_uncased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_pak_legal_bert_small_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|131.1 MB| + +## References + +https://huggingface.co/AISystems/PAK-LEGAL-BERT-small-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_prunedbert_l12_h384_a6_finetuned_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_prunedbert_l12_h384_a6_finetuned_en.md new file mode 100644 index 00000000000000..786616bedc58fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_prunedbert_l12_h384_a6_finetuned_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_prunedbert_l12_h384_a6_finetuned BertSentenceEmbeddings from eli4s +author: John Snow Labs +name: sent_prunedbert_l12_h384_a6_finetuned +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_prunedbert_l12_h384_a6_finetuned` is a English model originally trained by eli4s. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_prunedbert_l12_h384_a6_finetuned_en_5.5.1_3.0_1731295988320.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_prunedbert_l12_h384_a6_finetuned_en_5.5.1_3.0_1731295988320.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_prunedbert_l12_h384_a6_finetuned","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_prunedbert_l12_h384_a6_finetuned","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_prunedbert_l12_h384_a6_finetuned| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|176.4 MB| + +## References + +https://huggingface.co/eli4s/prunedBert-L12-h384-A6-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_prunedbert_l12_h384_a6_finetuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_prunedbert_l12_h384_a6_finetuned_pipeline_en.md new file mode 100644 index 00000000000000..809d46e5e4e899 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_prunedbert_l12_h384_a6_finetuned_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_prunedbert_l12_h384_a6_finetuned_pipeline pipeline BertSentenceEmbeddings from eli4s +author: John Snow Labs +name: sent_prunedbert_l12_h384_a6_finetuned_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_prunedbert_l12_h384_a6_finetuned_pipeline` is a English model originally trained by eli4s. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_prunedbert_l12_h384_a6_finetuned_pipeline_en_5.5.1_3.0_1731295997556.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_prunedbert_l12_h384_a6_finetuned_pipeline_en_5.5.1_3.0_1731295997556.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_prunedbert_l12_h384_a6_finetuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_prunedbert_l12_h384_a6_finetuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_prunedbert_l12_h384_a6_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|177.0 MB| + +## References + +https://huggingface.co/eli4s/prunedBert-L12-h384-A6-finetuned + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_structbert_large_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_structbert_large_en.md new file mode 100644 index 00000000000000..9d411c90eabd98 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_structbert_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_structbert_large BertSentenceEmbeddings from bayartsogt +author: John Snow Labs +name: sent_structbert_large +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_structbert_large` is a English model originally trained by bayartsogt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_structbert_large_en_5.5.1_3.0_1731295741007.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_structbert_large_en_5.5.1_3.0_1731295741007.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_structbert_large","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_structbert_large","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_structbert_large| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/bayartsogt/structbert-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_structbert_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_structbert_large_pipeline_en.md new file mode 100644 index 00000000000000..f9545f81882ba4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_structbert_large_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_structbert_large_pipeline pipeline BertSentenceEmbeddings from bayartsogt +author: John Snow Labs +name: sent_structbert_large_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_structbert_large_pipeline` is a English model originally trained by bayartsogt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_structbert_large_pipeline_en_5.5.1_3.0_1731295805138.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_structbert_large_pipeline_en_5.5.1_3.0_1731295805138.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_structbert_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_structbert_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_structbert_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/bayartsogt/structbert-large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_glue_mnli_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_glue_mnli_en.md new file mode 100644 index 00000000000000..bd547fe49d2313 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_glue_mnli_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_tiny_mlm_glue_mnli BertSentenceEmbeddings from muhtasham +author: John Snow Labs +name: sent_tiny_mlm_glue_mnli +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_tiny_mlm_glue_mnli` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_tiny_mlm_glue_mnli_en_5.5.1_3.0_1731295597619.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_tiny_mlm_glue_mnli_en_5.5.1_3.0_1731295597619.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_tiny_mlm_glue_mnli","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_tiny_mlm_glue_mnli","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_tiny_mlm_glue_mnli| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-mnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_glue_mnli_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_glue_mnli_pipeline_en.md new file mode 100644 index 00000000000000..01b0eedcdddeb1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_glue_mnli_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_tiny_mlm_glue_mnli_pipeline pipeline BertSentenceEmbeddings from muhtasham +author: John Snow Labs +name: sent_tiny_mlm_glue_mnli_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_tiny_mlm_glue_mnli_pipeline` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_tiny_mlm_glue_mnli_pipeline_en_5.5.1_3.0_1731295598891.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_tiny_mlm_glue_mnli_pipeline_en_5.5.1_3.0_1731295598891.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_tiny_mlm_glue_mnli_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_tiny_mlm_glue_mnli_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_tiny_mlm_glue_mnli_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|17.2 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-mnli + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_glue_qnli_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_glue_qnli_en.md new file mode 100644 index 00000000000000..d3ccc3bf166267 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_glue_qnli_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_tiny_mlm_glue_qnli BertSentenceEmbeddings from muhtasham +author: John Snow Labs +name: sent_tiny_mlm_glue_qnli +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_tiny_mlm_glue_qnli` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_tiny_mlm_glue_qnli_en_5.5.1_3.0_1731295911140.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_tiny_mlm_glue_qnli_en_5.5.1_3.0_1731295911140.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_tiny_mlm_glue_qnli","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_tiny_mlm_glue_qnli","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_tiny_mlm_glue_qnli| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_glue_qnli_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_glue_qnli_pipeline_en.md new file mode 100644 index 00000000000000..3fc10d23738b8a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_glue_qnli_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_tiny_mlm_glue_qnli_pipeline pipeline BertSentenceEmbeddings from muhtasham +author: John Snow Labs +name: sent_tiny_mlm_glue_qnli_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_tiny_mlm_glue_qnli_pipeline` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_tiny_mlm_glue_qnli_pipeline_en_5.5.1_3.0_1731295912316.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_tiny_mlm_glue_qnli_pipeline_en_5.5.1_3.0_1731295912316.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_tiny_mlm_glue_qnli_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_tiny_mlm_glue_qnli_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_tiny_mlm_glue_qnli_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|17.2 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qnli + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_snli_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_snli_en.md new file mode 100644 index 00000000000000..59b38622e1e5e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_snli_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_tiny_mlm_snli BertSentenceEmbeddings from muhtasham +author: John Snow Labs +name: sent_tiny_mlm_snli +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_tiny_mlm_snli` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_tiny_mlm_snli_en_5.5.1_3.0_1731296115989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_tiny_mlm_snli_en_5.5.1_3.0_1731296115989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_tiny_mlm_snli","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_tiny_mlm_snli","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_tiny_mlm_snli| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-snli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_snli_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_snli_pipeline_en.md new file mode 100644 index 00000000000000..e1e8cbfe3b8e35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_tiny_mlm_snli_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_tiny_mlm_snli_pipeline pipeline BertSentenceEmbeddings from muhtasham +author: John Snow Labs +name: sent_tiny_mlm_snli_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_tiny_mlm_snli_pipeline` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_tiny_mlm_snli_pipeline_en_5.5.1_3.0_1731296117216.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_tiny_mlm_snli_pipeline_en_5.5.1_3.0_1731296117216.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_tiny_mlm_snli_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_tiny_mlm_snli_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_tiny_mlm_snli_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|17.2 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-snli + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_tinybert_javanese_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_tinybert_javanese_en.md new file mode 100644 index 00000000000000..1c92e04daee06f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_tinybert_javanese_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_tinybert_javanese BertSentenceEmbeddings from akahana +author: John Snow Labs +name: sent_tinybert_javanese +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_tinybert_javanese` is a English model originally trained by akahana. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_tinybert_javanese_en_5.5.1_3.0_1731296102820.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_tinybert_javanese_en_5.5.1_3.0_1731296102820.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_tinybert_javanese","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_tinybert_javanese","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_tinybert_javanese| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/akahana/tinybert-javanese \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_tinybert_javanese_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_tinybert_javanese_pipeline_en.md new file mode 100644 index 00000000000000..0edbdfbd9bfaf9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_tinybert_javanese_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_tinybert_javanese_pipeline pipeline BertSentenceEmbeddings from akahana +author: John Snow Labs +name: sent_tinybert_javanese_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_tinybert_javanese_pipeline` is a English model originally trained by akahana. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_tinybert_javanese_pipeline_en_5.5.1_3.0_1731296104116.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_tinybert_javanese_pipeline_en_5.5.1_3.0_1731296104116.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_tinybert_javanese_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_tinybert_javanese_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_tinybert_javanese_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|17.2 MB| + +## References + +https://huggingface.co/akahana/tinybert-javanese + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_youtube_bert_10_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_youtube_bert_10_en.md new file mode 100644 index 00000000000000..ec8410874d4ce9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_youtube_bert_10_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_youtube_bert_10 BertSentenceEmbeddings from flboehm +author: John Snow Labs +name: sent_youtube_bert_10 +date: 2024-11-11 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_youtube_bert_10` is a English model originally trained by flboehm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_youtube_bert_10_en_5.5.1_3.0_1731296253290.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_youtube_bert_10_en_5.5.1_3.0_1731296253290.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_youtube_bert_10","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_youtube_bert_10","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_youtube_bert_10| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/flboehm/youtube-bert_10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sent_youtube_bert_10_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sent_youtube_bert_10_pipeline_en.md new file mode 100644 index 00000000000000..4d90e7434588fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sent_youtube_bert_10_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_youtube_bert_10_pipeline pipeline BertSentenceEmbeddings from flboehm +author: John Snow Labs +name: sent_youtube_bert_10_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_youtube_bert_10_pipeline` is a English model originally trained by flboehm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_youtube_bert_10_pipeline_en_5.5.1_3.0_1731296274142.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_youtube_bert_10_pipeline_en_5.5.1_3.0_1731296274142.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_youtube_bert_10_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_youtube_bert_10_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_youtube_bert_10_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/flboehm/youtube-bert_10 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sentence_similarity_finetuned_mpnet_adrta_en.md b/docs/_posts/ahmedlone127/2024-11-11-sentence_similarity_finetuned_mpnet_adrta_en.md new file mode 100644 index 00000000000000..3b878db8d41968 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sentence_similarity_finetuned_mpnet_adrta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sentence_similarity_finetuned_mpnet_adrta MPNetForSequenceClassification from aizenSosuke +author: John Snow Labs +name: sentence_similarity_finetuned_mpnet_adrta +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentence_similarity_finetuned_mpnet_adrta` is a English model originally trained by aizenSosuke. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentence_similarity_finetuned_mpnet_adrta_en_5.5.1_3.0_1731301599861.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentence_similarity_finetuned_mpnet_adrta_en_5.5.1_3.0_1731301599861.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("sentence_similarity_finetuned_mpnet_adrta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("sentence_similarity_finetuned_mpnet_adrta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentence_similarity_finetuned_mpnet_adrta| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.3 MB| + +## References + +https://huggingface.co/aizenSosuke/sentence-similarity-finetuned-mpnet-adrta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sentence_similarity_finetuned_mpnet_adrta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sentence_similarity_finetuned_mpnet_adrta_pipeline_en.md new file mode 100644 index 00000000000000..f17efdad427243 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sentence_similarity_finetuned_mpnet_adrta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sentence_similarity_finetuned_mpnet_adrta_pipeline pipeline MPNetForSequenceClassification from aizenSosuke +author: John Snow Labs +name: sentence_similarity_finetuned_mpnet_adrta_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentence_similarity_finetuned_mpnet_adrta_pipeline` is a English model originally trained by aizenSosuke. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentence_similarity_finetuned_mpnet_adrta_pipeline_en_5.5.1_3.0_1731301620394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentence_similarity_finetuned_mpnet_adrta_pipeline_en_5.5.1_3.0_1731301620394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentence_similarity_finetuned_mpnet_adrta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentence_similarity_finetuned_mpnet_adrta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentence_similarity_finetuned_mpnet_adrta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.3 MB| + +## References + +https://huggingface.co/aizenSosuke/sentence-similarity-finetuned-mpnet-adrta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sentence_transformers_all_mpnet_base_v2_en.md b/docs/_posts/ahmedlone127/2024-11-11-sentence_transformers_all_mpnet_base_v2_en.md new file mode 100644 index 00000000000000..655ee36269b0f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sentence_transformers_all_mpnet_base_v2_en.md @@ -0,0 +1,88 @@ +--- +layout: model +title: English sentence_transformers_all_mpnet_base_v2 MPNetEmbeddings from ai-human-lab +author: John Snow Labs +name: sentence_transformers_all_mpnet_base_v2 +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentence_transformers_all_mpnet_base_v2` is a English model originally trained by ai-human-lab. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentence_transformers_all_mpnet_base_v2_en_5.5.1_3.0_1731294868447.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentence_transformers_all_mpnet_base_v2_en_5.5.1_3.0_1731294868447.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("sentence_transformers_all_mpnet_base_v2","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("sentence_transformers_all_mpnet_base_v2","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentence_transformers_all_mpnet_base_v2| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +References + +https://huggingface.co/ai-human-lab/sentence-transformers_all-mpnet-base-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sentence_transformers_all_mpnet_base_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sentence_transformers_all_mpnet_base_v2_pipeline_en.md new file mode 100644 index 00000000000000..1014fb19af5de3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sentence_transformers_all_mpnet_base_v2_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sentence_transformers_all_mpnet_base_v2_pipeline pipeline MPNetEmbeddings from ai-human-lab +author: John Snow Labs +name: sentence_transformers_all_mpnet_base_v2_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentence_transformers_all_mpnet_base_v2_pipeline` is a English model originally trained by ai-human-lab. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentence_transformers_all_mpnet_base_v2_pipeline_en_5.5.1_3.0_1731294890802.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentence_transformers_all_mpnet_base_v2_pipeline_en_5.5.1_3.0_1731294890802.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +pipeline = PretrainedPipeline("sentence_transformers_all_mpnet_base_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) +``` +```scala +val pipeline = new PretrainedPipeline("sentence_transformers_all_mpnet_base_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentence_transformers_all_mpnet_base_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +References + +https://huggingface.co/ai-human-lab/sentence-transformers_all-mpnet-base-v2 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sentencetransformer_ftmodel_on_chemical_dataset_en.md b/docs/_posts/ahmedlone127/2024-11-11-sentencetransformer_ftmodel_on_chemical_dataset_en.md new file mode 100644 index 00000000000000..45619df65fac6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sentencetransformer_ftmodel_on_chemical_dataset_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English sentencetransformer_ftmodel_on_chemical_dataset MPNetEmbeddings from Saideepthi55 +author: John Snow Labs +name: sentencetransformer_ftmodel_on_chemical_dataset +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentencetransformer_ftmodel_on_chemical_dataset` is a English model originally trained by Saideepthi55. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentencetransformer_ftmodel_on_chemical_dataset_en_5.5.1_3.0_1731295029219.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentencetransformer_ftmodel_on_chemical_dataset_en_5.5.1_3.0_1731295029219.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("sentencetransformer_ftmodel_on_chemical_dataset","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("sentencetransformer_ftmodel_on_chemical_dataset","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentencetransformer_ftmodel_on_chemical_dataset| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|402.5 MB| + +## References + +https://huggingface.co/Saideepthi55/sentencetransformer_ftmodel_on_chemical_dataset \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sentencetransformer_ftmodel_on_chemical_dataset_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sentencetransformer_ftmodel_on_chemical_dataset_pipeline_en.md new file mode 100644 index 00000000000000..cd8788d4ba267c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sentencetransformer_ftmodel_on_chemical_dataset_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English sentencetransformer_ftmodel_on_chemical_dataset_pipeline pipeline MPNetEmbeddings from Saideepthi55 +author: John Snow Labs +name: sentencetransformer_ftmodel_on_chemical_dataset_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentencetransformer_ftmodel_on_chemical_dataset_pipeline` is a English model originally trained by Saideepthi55. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentencetransformer_ftmodel_on_chemical_dataset_pipeline_en_5.5.1_3.0_1731295052812.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentencetransformer_ftmodel_on_chemical_dataset_pipeline_en_5.5.1_3.0_1731295052812.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentencetransformer_ftmodel_on_chemical_dataset_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentencetransformer_ftmodel_on_chemical_dataset_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentencetransformer_ftmodel_on_chemical_dataset_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|402.5 MB| + +## References + +https://huggingface.co/Saideepthi55/sentencetransformer_ftmodel_on_chemical_dataset + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sentencetransformer_mpnet_base_on_chemical_dataset_en.md b/docs/_posts/ahmedlone127/2024-11-11-sentencetransformer_mpnet_base_on_chemical_dataset_en.md new file mode 100644 index 00000000000000..d5045ad86dd36b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sentencetransformer_mpnet_base_on_chemical_dataset_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sentencetransformer_mpnet_base_on_chemical_dataset MPNetForSequenceClassification from Saideepthi55 +author: John Snow Labs +name: sentencetransformer_mpnet_base_on_chemical_dataset +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentencetransformer_mpnet_base_on_chemical_dataset` is a English model originally trained by Saideepthi55. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentencetransformer_mpnet_base_on_chemical_dataset_en_5.5.1_3.0_1731301268354.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentencetransformer_mpnet_base_on_chemical_dataset_en_5.5.1_3.0_1731301268354.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("sentencetransformer_mpnet_base_on_chemical_dataset","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("sentencetransformer_mpnet_base_on_chemical_dataset", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentencetransformer_mpnet_base_on_chemical_dataset| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|263.5 MB| + +## References + +https://huggingface.co/Saideepthi55/sentencetransformer_mpnet_base_on_chemical_dataset \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sentencetransformer_mpnet_base_on_chemical_dataset_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sentencetransformer_mpnet_base_on_chemical_dataset_pipeline_en.md new file mode 100644 index 00000000000000..6b0b2bd45c68ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sentencetransformer_mpnet_base_on_chemical_dataset_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sentencetransformer_mpnet_base_on_chemical_dataset_pipeline pipeline MPNetForSequenceClassification from Saideepthi55 +author: John Snow Labs +name: sentencetransformer_mpnet_base_on_chemical_dataset_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentencetransformer_mpnet_base_on_chemical_dataset_pipeline` is a English model originally trained by Saideepthi55. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentencetransformer_mpnet_base_on_chemical_dataset_pipeline_en_5.5.1_3.0_1731301346750.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentencetransformer_mpnet_base_on_chemical_dataset_pipeline_en_5.5.1_3.0_1731301346750.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentencetransformer_mpnet_base_on_chemical_dataset_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentencetransformer_mpnet_base_on_chemical_dataset_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentencetransformer_mpnet_base_on_chemical_dataset_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|263.5 MB| + +## References + +https://huggingface.co/Saideepthi55/sentencetransformer_mpnet_base_on_chemical_dataset + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-setfit_model_ireland_4labels_unbalanced_data_en.md b/docs/_posts/ahmedlone127/2024-11-11-setfit_model_ireland_4labels_unbalanced_data_en.md new file mode 100644 index 00000000000000..f6651ad90e1108 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-setfit_model_ireland_4labels_unbalanced_data_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English setfit_model_ireland_4labels_unbalanced_data MPNetEmbeddings from mitra-mir +author: John Snow Labs +name: setfit_model_ireland_4labels_unbalanced_data +date: 2024-11-11 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_model_ireland_4labels_unbalanced_data` is a English model originally trained by mitra-mir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_model_ireland_4labels_unbalanced_data_en_5.5.1_3.0_1731294943741.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_model_ireland_4labels_unbalanced_data_en_5.5.1_3.0_1731294943741.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("setfit_model_ireland_4labels_unbalanced_data","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("setfit_model_ireland_4labels_unbalanced_data","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_model_ireland_4labels_unbalanced_data| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/mitra-mir/setfit-model-Ireland_4labels_unbalanced_data \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-setfit_model_ireland_4labels_unbalanced_data_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-setfit_model_ireland_4labels_unbalanced_data_pipeline_en.md new file mode 100644 index 00000000000000..562b9e139b615d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-setfit_model_ireland_4labels_unbalanced_data_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English setfit_model_ireland_4labels_unbalanced_data_pipeline pipeline MPNetEmbeddings from mitra-mir +author: John Snow Labs +name: setfit_model_ireland_4labels_unbalanced_data_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_model_ireland_4labels_unbalanced_data_pipeline` is a English model originally trained by mitra-mir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_model_ireland_4labels_unbalanced_data_pipeline_en_5.5.1_3.0_1731294965484.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_model_ireland_4labels_unbalanced_data_pipeline_en_5.5.1_3.0_1731294965484.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("setfit_model_ireland_4labels_unbalanced_data_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("setfit_model_ireland_4labels_unbalanced_data_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_model_ireland_4labels_unbalanced_data_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/mitra-mir/setfit-model-Ireland_4labels_unbalanced_data + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-software_ner_prod_en.md b/docs/_posts/ahmedlone127/2024-11-11-software_ner_prod_en.md new file mode 100644 index 00000000000000..304fef613f4f5b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-software_ner_prod_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English software_ner_prod BertForTokenClassification from hadiaskari98 +author: John Snow Labs +name: software_ner_prod +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`software_ner_prod` is a English model originally trained by hadiaskari98. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/software_ner_prod_en_5.5.1_3.0_1731298584900.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/software_ner_prod_en_5.5.1_3.0_1731298584900.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("software_ner_prod","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("software_ner_prod", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|software_ner_prod| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/hadiaskari98/Software_NER_prod \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-software_ner_prod_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-software_ner_prod_pipeline_en.md new file mode 100644 index 00000000000000..914e298fd6beab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-software_ner_prod_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English software_ner_prod_pipeline pipeline BertForTokenClassification from hadiaskari98 +author: John Snow Labs +name: software_ner_prod_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`software_ner_prod_pipeline` is a English model originally trained by hadiaskari98. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/software_ner_prod_pipeline_en_5.5.1_3.0_1731298649868.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/software_ner_prod_pipeline_en_5.5.1_3.0_1731298649868.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("software_ner_prod_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("software_ner_prod_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|software_ner_prod_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/hadiaskari98/Software_NER_prod + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base10_en.md b/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base10_en.md new file mode 100644 index 00000000000000..46612537643cc3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base10_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English southern_sotho_mpnet_base10 MPNetForSequenceClassification from Saideepthi55 +author: John Snow Labs +name: southern_sotho_mpnet_base10 +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`southern_sotho_mpnet_base10` is a English model originally trained by Saideepthi55. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/southern_sotho_mpnet_base10_en_5.5.1_3.0_1731301271163.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/southern_sotho_mpnet_base10_en_5.5.1_3.0_1731301271163.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("southern_sotho_mpnet_base10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("southern_sotho_mpnet_base10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|southern_sotho_mpnet_base10| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|408.9 MB| + +## References + +https://huggingface.co/Saideepthi55/st_mpnet_base10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base10_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base10_pipeline_en.md new file mode 100644 index 00000000000000..cbf94edb9db76f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base10_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English southern_sotho_mpnet_base10_pipeline pipeline MPNetForSequenceClassification from Saideepthi55 +author: John Snow Labs +name: southern_sotho_mpnet_base10_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`southern_sotho_mpnet_base10_pipeline` is a English model originally trained by Saideepthi55. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/southern_sotho_mpnet_base10_pipeline_en_5.5.1_3.0_1731301299330.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/southern_sotho_mpnet_base10_pipeline_en_5.5.1_3.0_1731301299330.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("southern_sotho_mpnet_base10_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("southern_sotho_mpnet_base10_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|southern_sotho_mpnet_base10_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.9 MB| + +## References + +https://huggingface.co/Saideepthi55/st_mpnet_base10 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base20_en.md b/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base20_en.md new file mode 100644 index 00000000000000..34e20462124c15 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base20_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English southern_sotho_mpnet_base20 MPNetForSequenceClassification from Saideepthi55 +author: John Snow Labs +name: southern_sotho_mpnet_base20 +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`southern_sotho_mpnet_base20` is a English model originally trained by Saideepthi55. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/southern_sotho_mpnet_base20_en_5.5.1_3.0_1731301543132.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/southern_sotho_mpnet_base20_en_5.5.1_3.0_1731301543132.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("southern_sotho_mpnet_base20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("southern_sotho_mpnet_base20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|southern_sotho_mpnet_base20| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|408.9 MB| + +## References + +https://huggingface.co/Saideepthi55/st_mpnet_base20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base20_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base20_pipeline_en.md new file mode 100644 index 00000000000000..6243a692d93e7e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base20_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English southern_sotho_mpnet_base20_pipeline pipeline MPNetForSequenceClassification from Saideepthi55 +author: John Snow Labs +name: southern_sotho_mpnet_base20_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`southern_sotho_mpnet_base20_pipeline` is a English model originally trained by Saideepthi55. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/southern_sotho_mpnet_base20_pipeline_en_5.5.1_3.0_1731301564358.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/southern_sotho_mpnet_base20_pipeline_en_5.5.1_3.0_1731301564358.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("southern_sotho_mpnet_base20_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("southern_sotho_mpnet_base20_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|southern_sotho_mpnet_base20_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.9 MB| + +## References + +https://huggingface.co/Saideepthi55/st_mpnet_base20 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base_normal_en.md b/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base_normal_en.md new file mode 100644 index 00000000000000..fa19b9f0f12d22 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base_normal_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English southern_sotho_mpnet_base_normal MPNetForSequenceClassification from Saideepthi55 +author: John Snow Labs +name: southern_sotho_mpnet_base_normal +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`southern_sotho_mpnet_base_normal` is a English model originally trained by Saideepthi55. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/southern_sotho_mpnet_base_normal_en_5.5.1_3.0_1731301340152.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/southern_sotho_mpnet_base_normal_en_5.5.1_3.0_1731301340152.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("southern_sotho_mpnet_base_normal","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("southern_sotho_mpnet_base_normal", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|southern_sotho_mpnet_base_normal| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|408.9 MB| + +## References + +https://huggingface.co/Saideepthi55/st_mpnet_base_normal \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base_normal_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base_normal_pipeline_en.md new file mode 100644 index 00000000000000..ccd69f639b6519 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-southern_sotho_mpnet_base_normal_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English southern_sotho_mpnet_base_normal_pipeline pipeline MPNetForSequenceClassification from Saideepthi55 +author: John Snow Labs +name: southern_sotho_mpnet_base_normal_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`southern_sotho_mpnet_base_normal_pipeline` is a English model originally trained by Saideepthi55. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/southern_sotho_mpnet_base_normal_pipeline_en_5.5.1_3.0_1731301362624.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/southern_sotho_mpnet_base_normal_pipeline_en_5.5.1_3.0_1731301362624.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("southern_sotho_mpnet_base_normal_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("southern_sotho_mpnet_base_normal_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|southern_sotho_mpnet_base_normal_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.9 MB| + +## References + +https://huggingface.co/Saideepthi55/st_mpnet_base_normal + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-spanish_medical_ner_es.md b/docs/_posts/ahmedlone127/2024-11-11-spanish_medical_ner_es.md new file mode 100644 index 00000000000000..32940e8b7f748a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-spanish_medical_ner_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish spanish_medical_ner BertForTokenClassification from HUMADEX +author: John Snow Labs +name: spanish_medical_ner +date: 2024-11-11 +tags: [es, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spanish_medical_ner` is a Castilian, Spanish model originally trained by HUMADEX. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spanish_medical_ner_es_5.5.1_3.0_1731299312525.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spanish_medical_ner_es_5.5.1_3.0_1731299312525.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("spanish_medical_ner","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("spanish_medical_ner", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spanish_medical_ner| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|403.7 MB| + +## References + +https://huggingface.co/HUMADEX/spanish_medical_ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-spanish_medical_ner_pipeline_es.md b/docs/_posts/ahmedlone127/2024-11-11-spanish_medical_ner_pipeline_es.md new file mode 100644 index 00000000000000..9b0dd5371d3052 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-spanish_medical_ner_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish spanish_medical_ner_pipeline pipeline BertForTokenClassification from HUMADEX +author: John Snow Labs +name: spanish_medical_ner_pipeline +date: 2024-11-11 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spanish_medical_ner_pipeline` is a Castilian, Spanish model originally trained by HUMADEX. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spanish_medical_ner_pipeline_es_5.5.1_3.0_1731299333046.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spanish_medical_ner_pipeline_es_5.5.1_3.0_1731299333046.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("spanish_medical_ner_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("spanish_medical_ner_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spanish_medical_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|403.7 MB| + +## References + +https://huggingface.co/HUMADEX/spanish_medical_ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sst2_benign_bert_uncased_en.md b/docs/_posts/ahmedlone127/2024-11-11-sst2_benign_bert_uncased_en.md new file mode 100644 index 00000000000000..8a15ca8acffe06 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sst2_benign_bert_uncased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sst2_benign_bert_uncased BertForSequenceClassification from dilarayavuz +author: John Snow Labs +name: sst2_benign_bert_uncased +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sst2_benign_bert_uncased` is a English model originally trained by dilarayavuz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sst2_benign_bert_uncased_en_5.5.1_3.0_1731309631235.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sst2_benign_bert_uncased_en_5.5.1_3.0_1731309631235.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("sst2_benign_bert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("sst2_benign_bert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sst2_benign_bert_uncased| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/dilarayavuz/sst2-benign-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sst2_benign_bert_uncased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sst2_benign_bert_uncased_pipeline_en.md new file mode 100644 index 00000000000000..622065dfccca22 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sst2_benign_bert_uncased_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sst2_benign_bert_uncased_pipeline pipeline BertForSequenceClassification from dilarayavuz +author: John Snow Labs +name: sst2_benign_bert_uncased_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sst2_benign_bert_uncased_pipeline` is a English model originally trained by dilarayavuz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sst2_benign_bert_uncased_pipeline_en_5.5.1_3.0_1731309652958.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sst2_benign_bert_uncased_pipeline_en_5.5.1_3.0_1731309652958.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sst2_benign_bert_uncased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sst2_benign_bert_uncased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sst2_benign_bert_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/dilarayavuz/sst2-benign-bert-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sysformver1_en.md b/docs/_posts/ahmedlone127/2024-11-11-sysformver1_en.md new file mode 100644 index 00000000000000..6df9fcc3dcdeab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sysformver1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sysformver1 BertForTokenClassification from blckwdw61 +author: John Snow Labs +name: sysformver1 +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sysformver1` is a English model originally trained by blckwdw61. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sysformver1_en_5.5.1_3.0_1731291183659.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sysformver1_en_5.5.1_3.0_1731291183659.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("sysformver1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("sysformver1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sysformver1| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/blckwdw61/sysformver1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-sysformver1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-sysformver1_pipeline_en.md new file mode 100644 index 00000000000000..1b88c0aed34dec --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-sysformver1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sysformver1_pipeline pipeline BertForTokenClassification from blckwdw61 +author: John Snow Labs +name: sysformver1_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sysformver1_pipeline` is a English model originally trained by blckwdw61. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sysformver1_pipeline_en_5.5.1_3.0_1731291209752.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sysformver1_pipeline_en_5.5.1_3.0_1731291209752.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sysformver1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sysformver1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sysformver1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/blckwdw61/sysformver1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-testthesissmallfiftytest_en.md b/docs/_posts/ahmedlone127/2024-11-11-testthesissmallfiftytest_en.md new file mode 100644 index 00000000000000..0fe26267d56c1f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-testthesissmallfiftytest_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English testthesissmallfiftytest BertForTokenClassification from Nonzerophilip +author: John Snow Labs +name: testthesissmallfiftytest +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`testthesissmallfiftytest` is a English model originally trained by Nonzerophilip. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/testthesissmallfiftytest_en_5.5.1_3.0_1731285202383.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/testthesissmallfiftytest_en_5.5.1_3.0_1731285202383.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("testthesissmallfiftytest","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("testthesissmallfiftytest", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|testthesissmallfiftytest| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|465.3 MB| + +## References + +https://huggingface.co/Nonzerophilip/testThesisSmallfiftyTEST \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-testthesissmallfiftytest_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-testthesissmallfiftytest_pipeline_en.md new file mode 100644 index 00000000000000..2911e0f4f34e5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-testthesissmallfiftytest_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English testthesissmallfiftytest_pipeline pipeline BertForTokenClassification from Nonzerophilip +author: John Snow Labs +name: testthesissmallfiftytest_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`testthesissmallfiftytest_pipeline` is a English model originally trained by Nonzerophilip. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/testthesissmallfiftytest_pipeline_en_5.5.1_3.0_1731285227026.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/testthesissmallfiftytest_pipeline_en_5.5.1_3.0_1731285227026.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("testthesissmallfiftytest_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("testthesissmallfiftytest_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|testthesissmallfiftytest_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.3 MB| + +## References + +https://huggingface.co/Nonzerophilip/testThesisSmallfiftyTEST + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-testthesissmallfiftytestaugfivegpt_en.md b/docs/_posts/ahmedlone127/2024-11-11-testthesissmallfiftytestaugfivegpt_en.md new file mode 100644 index 00000000000000..3ab88e365824f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-testthesissmallfiftytestaugfivegpt_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English testthesissmallfiftytestaugfivegpt BertForTokenClassification from Nonzerophilip +author: John Snow Labs +name: testthesissmallfiftytestaugfivegpt +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`testthesissmallfiftytestaugfivegpt` is a English model originally trained by Nonzerophilip. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/testthesissmallfiftytestaugfivegpt_en_5.5.1_3.0_1731285482289.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/testthesissmallfiftytestaugfivegpt_en_5.5.1_3.0_1731285482289.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("testthesissmallfiftytestaugfivegpt","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("testthesissmallfiftytestaugfivegpt", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|testthesissmallfiftytestaugfivegpt| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|465.3 MB| + +## References + +https://huggingface.co/Nonzerophilip/testThesisSmallfiftyTESTAugfiveGPT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-testthesissmallfiftytestaugfivegpt_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-testthesissmallfiftytestaugfivegpt_pipeline_en.md new file mode 100644 index 00000000000000..dcdddde4fb8a3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-testthesissmallfiftytestaugfivegpt_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English testthesissmallfiftytestaugfivegpt_pipeline pipeline BertForTokenClassification from Nonzerophilip +author: John Snow Labs +name: testthesissmallfiftytestaugfivegpt_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`testthesissmallfiftytestaugfivegpt_pipeline` is a English model originally trained by Nonzerophilip. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/testthesissmallfiftytestaugfivegpt_pipeline_en_5.5.1_3.0_1731285506982.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/testthesissmallfiftytestaugfivegpt_pipeline_en_5.5.1_3.0_1731285506982.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("testthesissmallfiftytestaugfivegpt_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("testthesissmallfiftytestaugfivegpt_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|testthesissmallfiftytestaugfivegpt_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.3 MB| + +## References + +https://huggingface.co/Nonzerophilip/testThesisSmallfiftyTESTAugfiveGPT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-tinybert_keyword_en.md b/docs/_posts/ahmedlone127/2024-11-11-tinybert_keyword_en.md new file mode 100644 index 00000000000000..9b8182548bfd73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-tinybert_keyword_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tinybert_keyword BertForTokenClassification from nirusanan +author: John Snow Labs +name: tinybert_keyword +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tinybert_keyword` is a English model originally trained by nirusanan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tinybert_keyword_en_5.5.1_3.0_1731290570020.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tinybert_keyword_en_5.5.1_3.0_1731290570020.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("tinybert_keyword","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("tinybert_keyword", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tinybert_keyword| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|53.9 MB| + +## References + +https://huggingface.co/nirusanan/tinyBert-keyword \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-tinybert_keyword_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-tinybert_keyword_pipeline_en.md new file mode 100644 index 00000000000000..6e24c0b8d71454 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-tinybert_keyword_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English tinybert_keyword_pipeline pipeline BertForTokenClassification from nirusanan +author: John Snow Labs +name: tinybert_keyword_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tinybert_keyword_pipeline` is a English model originally trained by nirusanan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tinybert_keyword_pipeline_en_5.5.1_3.0_1731290572847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tinybert_keyword_pipeline_en_5.5.1_3.0_1731290572847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tinybert_keyword_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tinybert_keyword_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tinybert_keyword_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|53.9 MB| + +## References + +https://huggingface.co/nirusanan/tinyBert-keyword + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-token_classification_wnut_en.md b/docs/_posts/ahmedlone127/2024-11-11-token_classification_wnut_en.md new file mode 100644 index 00000000000000..cc75f3d6017e56 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-token_classification_wnut_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English token_classification_wnut BertForTokenClassification from StatsGary +author: John Snow Labs +name: token_classification_wnut +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`token_classification_wnut` is a English model originally trained by StatsGary. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/token_classification_wnut_en_5.5.1_3.0_1731290160734.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/token_classification_wnut_en_5.5.1_3.0_1731290160734.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("token_classification_wnut","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("token_classification_wnut", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|token_classification_wnut| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/StatsGary/token_classification_wnut \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-token_classification_wnut_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-token_classification_wnut_pipeline_en.md new file mode 100644 index 00000000000000..420e8725a1dc31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-token_classification_wnut_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English token_classification_wnut_pipeline pipeline BertForTokenClassification from StatsGary +author: John Snow Labs +name: token_classification_wnut_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`token_classification_wnut_pipeline` is a English model originally trained by StatsGary. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/token_classification_wnut_pipeline_en_5.5.1_3.0_1731290224907.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/token_classification_wnut_pipeline_en_5.5.1_3.0_1731290224907.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("token_classification_wnut_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("token_classification_wnut_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|token_classification_wnut_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/StatsGary/token_classification_wnut + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-unified_skill_ner_echo_en.md b/docs/_posts/ahmedlone127/2024-11-11-unified_skill_ner_echo_en.md new file mode 100644 index 00000000000000..8a5a646aabe269 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-unified_skill_ner_echo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English unified_skill_ner_echo BertForTokenClassification from ledigajobb +author: John Snow Labs +name: unified_skill_ner_echo +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`unified_skill_ner_echo` is a English model originally trained by ledigajobb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/unified_skill_ner_echo_en_5.5.1_3.0_1731298457277.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/unified_skill_ner_echo_en_5.5.1_3.0_1731298457277.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("unified_skill_ner_echo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("unified_skill_ner_echo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|unified_skill_ner_echo| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|465.5 MB| + +## References + +https://huggingface.co/ledigajobb/unified_skill_ner_echo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-unified_skill_ner_echo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-unified_skill_ner_echo_pipeline_en.md new file mode 100644 index 00000000000000..ca82b108bc2710 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-unified_skill_ner_echo_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English unified_skill_ner_echo_pipeline pipeline BertForTokenClassification from ledigajobb +author: John Snow Labs +name: unified_skill_ner_echo_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`unified_skill_ner_echo_pipeline` is a English model originally trained by ledigajobb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/unified_skill_ner_echo_pipeline_en_5.5.1_3.0_1731298481375.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/unified_skill_ner_echo_pipeline_en_5.5.1_3.0_1731298481375.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("unified_skill_ner_echo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("unified_skill_ner_echo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|unified_skill_ner_echo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.5 MB| + +## References + +https://huggingface.co/ledigajobb/unified_skill_ner_echo + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-unitku_hubert_japanese_asr_ja.md b/docs/_posts/ahmedlone127/2024-11-11-unitku_hubert_japanese_asr_ja.md new file mode 100644 index 00000000000000..417b3e2e316059 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-unitku_hubert_japanese_asr_ja.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Japanese unitku_hubert_japanese_asr HubertForCTC from TKU410410103 +author: John Snow Labs +name: unitku_hubert_japanese_asr +date: 2024-11-11 +tags: [ja, open_source, onnx, asr, hubert] +task: Automatic Speech Recognition +language: ja +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: HubertForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`unitku_hubert_japanese_asr` is a Japanese model originally trained by TKU410410103. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/unitku_hubert_japanese_asr_ja_5.5.1_3.0_1731284872906.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/unitku_hubert_japanese_asr_ja_5.5.1_3.0_1731284872906.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = HubertForCTC.pretrained("unitku_hubert_japanese_asr","ja") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = HubertForCTC.pretrained("unitku_hubert_japanese_asr", "ja") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|unitku_hubert_japanese_asr| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ja| +|Size:|708.5 MB| + +## References + +https://huggingface.co/TKU410410103/uniTKU-hubert-japanese-asr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-unitku_hubert_japanese_asr_pipeline_ja.md b/docs/_posts/ahmedlone127/2024-11-11-unitku_hubert_japanese_asr_pipeline_ja.md new file mode 100644 index 00000000000000..dc49a94087aae7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-unitku_hubert_japanese_asr_pipeline_ja.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Japanese unitku_hubert_japanese_asr_pipeline pipeline HubertForCTC from TKU410410103 +author: John Snow Labs +name: unitku_hubert_japanese_asr_pipeline +date: 2024-11-11 +tags: [ja, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ja +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained HubertForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`unitku_hubert_japanese_asr_pipeline` is a Japanese model originally trained by TKU410410103. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/unitku_hubert_japanese_asr_pipeline_ja_5.5.1_3.0_1731284908856.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/unitku_hubert_japanese_asr_pipeline_ja_5.5.1_3.0_1731284908856.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("unitku_hubert_japanese_asr_pipeline", lang = "ja") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("unitku_hubert_japanese_asr_pipeline", lang = "ja") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|unitku_hubert_japanese_asr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ja| +|Size:|708.5 MB| + +## References + +https://huggingface.co/TKU410410103/uniTKU-hubert-japanese-asr + +## Included Models + +- AudioAssembler +- HubertForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-welt_biobert_ncbi_en.md b/docs/_posts/ahmedlone127/2024-11-11-welt_biobert_ncbi_en.md new file mode 100644 index 00000000000000..6b45f66b9da41b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-welt_biobert_ncbi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English welt_biobert_ncbi BertForTokenClassification from ghadeermobasher +author: John Snow Labs +name: welt_biobert_ncbi +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`welt_biobert_ncbi` is a English model originally trained by ghadeermobasher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/welt_biobert_ncbi_en_5.5.1_3.0_1731285897383.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/welt_biobert_ncbi_en_5.5.1_3.0_1731285897383.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("welt_biobert_ncbi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("welt_biobert_ncbi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|welt_biobert_ncbi| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.1 MB| + +## References + +https://huggingface.co/ghadeermobasher/WELT-BioBERT-NCBI \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-welt_biobert_ncbi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-welt_biobert_ncbi_pipeline_en.md new file mode 100644 index 00000000000000..7a02552d647c15 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-welt_biobert_ncbi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English welt_biobert_ncbi_pipeline pipeline BertForTokenClassification from ghadeermobasher +author: John Snow Labs +name: welt_biobert_ncbi_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`welt_biobert_ncbi_pipeline` is a English model originally trained by ghadeermobasher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/welt_biobert_ncbi_pipeline_en_5.5.1_3.0_1731285921198.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/welt_biobert_ncbi_pipeline_en_5.5.1_3.0_1731285921198.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("welt_biobert_ncbi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("welt_biobert_ncbi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|welt_biobert_ncbi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.1 MB| + +## References + +https://huggingface.co/ghadeermobasher/WELT-BioBERT-NCBI + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_base_common_voice_arabic11_0_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_base_common_voice_arabic11_0_en.md new file mode 100644 index 00000000000000..d72472c08a233d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_base_common_voice_arabic11_0_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_base_common_voice_arabic11_0 WhisperForCTC from Abdo96 +author: John Snow Labs +name: whisper_base_common_voice_arabic11_0 +date: 2024-11-11 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_base_common_voice_arabic11_0` is a English model originally trained by Abdo96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_base_common_voice_arabic11_0_en_5.5.1_3.0_1731304414617.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_base_common_voice_arabic11_0_en_5.5.1_3.0_1731304414617.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_base_common_voice_arabic11_0","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_base_common_voice_arabic11_0", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_base_common_voice_arabic11_0| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|642.3 MB| + +## References + +https://huggingface.co/Abdo96/whisper-base-common-voice-Arabic11.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_base_common_voice_arabic11_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_base_common_voice_arabic11_0_pipeline_en.md new file mode 100644 index 00000000000000..e6395ff989f713 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_base_common_voice_arabic11_0_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_base_common_voice_arabic11_0_pipeline pipeline WhisperForCTC from Abdo96 +author: John Snow Labs +name: whisper_base_common_voice_arabic11_0_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_base_common_voice_arabic11_0_pipeline` is a English model originally trained by Abdo96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_base_common_voice_arabic11_0_pipeline_en_5.5.1_3.0_1731304449172.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_base_common_voice_arabic11_0_pipeline_en_5.5.1_3.0_1731304449172.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_base_common_voice_arabic11_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_base_common_voice_arabic11_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_base_common_voice_arabic11_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|642.4 MB| + +## References + +https://huggingface.co/Abdo96/whisper-base-common-voice-Arabic11.0 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_base_hungarian_v1_hu.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_base_hungarian_v1_hu.md new file mode 100644 index 00000000000000..e24bde593dcfd0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_base_hungarian_v1_hu.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Hungarian whisper_base_hungarian_v1 WhisperForCTC from sarpba +author: John Snow Labs +name: whisper_base_hungarian_v1 +date: 2024-11-11 +tags: [hu, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: hu +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_base_hungarian_v1` is a Hungarian model originally trained by sarpba. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_base_hungarian_v1_hu_5.5.1_3.0_1731304667467.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_base_hungarian_v1_hu_5.5.1_3.0_1731304667467.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_base_hungarian_v1","hu") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_base_hungarian_v1", "hu") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_base_hungarian_v1| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|hu| +|Size:|643.4 MB| + +## References + +https://huggingface.co/sarpba/whisper-base-hungarian_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_base_hungarian_v1_pipeline_hu.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_base_hungarian_v1_pipeline_hu.md new file mode 100644 index 00000000000000..983b44d5fb09ee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_base_hungarian_v1_pipeline_hu.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Hungarian whisper_base_hungarian_v1_pipeline pipeline WhisperForCTC from sarpba +author: John Snow Labs +name: whisper_base_hungarian_v1_pipeline +date: 2024-11-11 +tags: [hu, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: hu +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_base_hungarian_v1_pipeline` is a Hungarian model originally trained by sarpba. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_base_hungarian_v1_pipeline_hu_5.5.1_3.0_1731304705195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_base_hungarian_v1_pipeline_hu_5.5.1_3.0_1731304705195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_base_hungarian_v1_pipeline", lang = "hu") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_base_hungarian_v1_pipeline", lang = "hu") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_base_hungarian_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|hu| +|Size:|643.4 MB| + +## References + +https://huggingface.co/sarpba/whisper-base-hungarian_v1 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_finetuned_atcosim_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_finetuned_atcosim_en.md new file mode 100644 index 00000000000000..d564ea07b24a65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_finetuned_atcosim_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_finetuned_atcosim WhisperForCTC from bhattasp +author: John Snow Labs +name: whisper_finetuned_atcosim +date: 2024-11-11 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_finetuned_atcosim` is a English model originally trained by bhattasp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_finetuned_atcosim_en_5.5.1_3.0_1731304932861.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_finetuned_atcosim_en_5.5.1_3.0_1731304932861.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_finetuned_atcosim","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_finetuned_atcosim", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_finetuned_atcosim| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|389.9 MB| + +## References + +https://huggingface.co/bhattasp/whisper-finetuned-atcosim \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_finetuned_atcosim_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_finetuned_atcosim_pipeline_en.md new file mode 100644 index 00000000000000..4ea36338ea9182 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_finetuned_atcosim_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_finetuned_atcosim_pipeline pipeline WhisperForCTC from bhattasp +author: John Snow Labs +name: whisper_finetuned_atcosim_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_finetuned_atcosim_pipeline` is a English model originally trained by bhattasp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_finetuned_atcosim_pipeline_en_5.5.1_3.0_1731304954819.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_finetuned_atcosim_pipeline_en_5.5.1_3.0_1731304954819.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_finetuned_atcosim_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_finetuned_atcosim_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_finetuned_atcosim_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|389.9 MB| + +## References + +https://huggingface.co/bhattasp/whisper-finetuned-atcosim + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_medium_latvian_ver2_lv.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_medium_latvian_ver2_lv.md new file mode 100644 index 00000000000000..4e07ccde16313e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_medium_latvian_ver2_lv.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Latvian whisper_medium_latvian_ver2 WhisperForCTC from FelixK7 +author: John Snow Labs +name: whisper_medium_latvian_ver2 +date: 2024-11-11 +tags: [lv, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: lv +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_medium_latvian_ver2` is a Latvian model originally trained by FelixK7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_medium_latvian_ver2_lv_5.5.1_3.0_1731305211676.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_medium_latvian_ver2_lv_5.5.1_3.0_1731305211676.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_medium_latvian_ver2","lv") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_medium_latvian_ver2", "lv") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_medium_latvian_ver2| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|lv| +|Size:|4.8 GB| + +## References + +https://huggingface.co/FelixK7/whisper-medium-lv-ver2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_medium_latvian_ver2_pipeline_lv.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_medium_latvian_ver2_pipeline_lv.md new file mode 100644 index 00000000000000..841f62d981e9b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_medium_latvian_ver2_pipeline_lv.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Latvian whisper_medium_latvian_ver2_pipeline pipeline WhisperForCTC from FelixK7 +author: John Snow Labs +name: whisper_medium_latvian_ver2_pipeline +date: 2024-11-11 +tags: [lv, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: lv +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_medium_latvian_ver2_pipeline` is a Latvian model originally trained by FelixK7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_medium_latvian_ver2_pipeline_lv_5.5.1_3.0_1731305454634.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_medium_latvian_ver2_pipeline_lv_5.5.1_3.0_1731305454634.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_medium_latvian_ver2_pipeline", lang = "lv") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_medium_latvian_ver2_pipeline", lang = "lv") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_medium_latvian_ver2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|lv| +|Size:|4.8 GB| + +## References + +https://huggingface.co/FelixK7/whisper-medium-lv-ver2 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_medium_luluw_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_medium_luluw_en.md new file mode 100644 index 00000000000000..5223b32fdc44c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_medium_luluw_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_medium_luluw WhisperForCTC from luluw +author: John Snow Labs +name: whisper_medium_luluw +date: 2024-11-11 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_medium_luluw` is a English model originally trained by luluw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_medium_luluw_en_5.5.1_3.0_1731306059271.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_medium_luluw_en_5.5.1_3.0_1731306059271.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_medium_luluw","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_medium_luluw", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_medium_luluw| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|4.8 GB| + +## References + +https://huggingface.co/luluw/whisper-medium \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_omg_hi.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_omg_hi.md new file mode 100644 index 00000000000000..b4435c02573f70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_omg_hi.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Hindi whisper_omg WhisperForCTC from nurzhanit +author: John Snow Labs +name: whisper_omg +date: 2024-11-11 +tags: [hi, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: hi +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_omg` is a Hindi model originally trained by nurzhanit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_omg_hi_5.5.1_3.0_1731303196441.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_omg_hi_5.5.1_3.0_1731303196441.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_omg","hi") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_omg", "hi") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_omg| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|hi| +|Size:|1.7 GB| + +## References + +https://huggingface.co/nurzhanit/whisper-omg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_omg_pipeline_hi.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_omg_pipeline_hi.md new file mode 100644 index 00000000000000..ce99657ac4eb4b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_omg_pipeline_hi.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Hindi whisper_omg_pipeline pipeline WhisperForCTC from nurzhanit +author: John Snow Labs +name: whisper_omg_pipeline +date: 2024-11-11 +tags: [hi, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: hi +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_omg_pipeline` is a Hindi model originally trained by nurzhanit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_omg_pipeline_hi_5.5.1_3.0_1731303281350.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_omg_pipeline_hi_5.5.1_3.0_1731303281350.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_omg_pipeline", lang = "hi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_omg_pipeline", lang = "hi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_omg_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|hi| +|Size:|1.7 GB| + +## References + +https://huggingface.co/nurzhanit/whisper-omg + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_arabic_augmentation_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_arabic_augmentation_en.md new file mode 100644 index 00000000000000..f3ce10b617691e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_arabic_augmentation_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_arabic_augmentation WhisperForCTC from MohammedNasri +author: John Snow Labs +name: whisper_small_arabic_augmentation +date: 2024-11-11 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_arabic_augmentation` is a English model originally trained by MohammedNasri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_arabic_augmentation_en_5.5.1_3.0_1731302625589.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_arabic_augmentation_en_5.5.1_3.0_1731302625589.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_arabic_augmentation","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_arabic_augmentation", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_arabic_augmentation| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/MohammedNasri/whisper_small_ar_augmentation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_arabic_augmentation_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_arabic_augmentation_pipeline_en.md new file mode 100644 index 00000000000000..ab9d01e35faf54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_arabic_augmentation_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_arabic_augmentation_pipeline pipeline WhisperForCTC from MohammedNasri +author: John Snow Labs +name: whisper_small_arabic_augmentation_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_arabic_augmentation_pipeline` is a English model originally trained by MohammedNasri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_arabic_augmentation_pipeline_en_5.5.1_3.0_1731302718291.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_arabic_augmentation_pipeline_en_5.5.1_3.0_1731302718291.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_arabic_augmentation_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_arabic_augmentation_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_arabic_augmentation_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/MohammedNasri/whisper_small_ar_augmentation + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_dutch_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_dutch_en.md new file mode 100644 index 00000000000000..a5606e6acd2baf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_dutch_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_dutch WhisperForCTC from qmeeus +author: John Snow Labs +name: whisper_small_dutch +date: 2024-11-11 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_dutch` is a English model originally trained by qmeeus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_dutch_en_5.5.1_3.0_1731306172648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_dutch_en_5.5.1_3.0_1731306172648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_dutch","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_dutch", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_dutch| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/qmeeus/whisper-small-nl \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_dutch_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_dutch_pipeline_en.md new file mode 100644 index 00000000000000..d355242f419804 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_dutch_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_dutch_pipeline pipeline WhisperForCTC from qmeeus +author: John Snow Labs +name: whisper_small_dutch_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_dutch_pipeline` is a English model originally trained by qmeeus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_dutch_pipeline_en_5.5.1_3.0_1731306257048.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_dutch_pipeline_en_5.5.1_3.0_1731306257048.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_dutch_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_dutch_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_dutch_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/qmeeus/whisper-small-nl + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_english_crossdelenna_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_english_crossdelenna_en.md new file mode 100644 index 00000000000000..fb907989258e1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_english_crossdelenna_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_english_crossdelenna WhisperForCTC from crossdelenna +author: John Snow Labs +name: whisper_small_english_crossdelenna +date: 2024-11-11 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_english_crossdelenna` is a English model originally trained by crossdelenna. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_english_crossdelenna_en_5.5.1_3.0_1731303622627.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_english_crossdelenna_en_5.5.1_3.0_1731303622627.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_english_crossdelenna","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_english_crossdelenna", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_english_crossdelenna| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/crossdelenna/whisper-small.en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_english_crossdelenna_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_english_crossdelenna_pipeline_en.md new file mode 100644 index 00000000000000..d42e75e87eb373 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_english_crossdelenna_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_english_crossdelenna_pipeline pipeline WhisperForCTC from crossdelenna +author: John Snow Labs +name: whisper_small_english_crossdelenna_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_english_crossdelenna_pipeline` is a English model originally trained by crossdelenna. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_english_crossdelenna_pipeline_en_5.5.1_3.0_1731303710904.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_english_crossdelenna_pipeline_en_5.5.1_3.0_1731303710904.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_english_crossdelenna_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_english_crossdelenna_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_english_crossdelenna_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/crossdelenna/whisper-small.en + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_french_uncased_fr.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_french_uncased_fr.md new file mode 100644 index 00000000000000..9159fcdc8de494 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_french_uncased_fr.md @@ -0,0 +1,84 @@ +--- +layout: model +title: French whisper_small_french_uncased WhisperForCTC from qanastek +author: John Snow Labs +name: whisper_small_french_uncased +date: 2024-11-11 +tags: [fr, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: fr +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_french_uncased` is a French model originally trained by qanastek. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_french_uncased_fr_5.5.1_3.0_1731305584943.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_french_uncased_fr_5.5.1_3.0_1731305584943.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_french_uncased","fr") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_french_uncased", "fr") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_french_uncased| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|fr| +|Size:|1.7 GB| + +## References + +https://huggingface.co/qanastek/whisper-small-french-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_french_uncased_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_french_uncased_pipeline_fr.md new file mode 100644 index 00000000000000..cc55101ed344f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_french_uncased_pipeline_fr.md @@ -0,0 +1,69 @@ +--- +layout: model +title: French whisper_small_french_uncased_pipeline pipeline WhisperForCTC from qanastek +author: John Snow Labs +name: whisper_small_french_uncased_pipeline +date: 2024-11-11 +tags: [fr, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: fr +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_french_uncased_pipeline` is a French model originally trained by qanastek. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_french_uncased_pipeline_fr_5.5.1_3.0_1731305678116.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_french_uncased_pipeline_fr_5.5.1_3.0_1731305678116.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_french_uncased_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_french_uncased_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_french_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|1.7 GB| + +## References + +https://huggingface.co/qanastek/whisper-small-french-uncased + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_greek_modern_finetune_el.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_greek_modern_finetune_el.md new file mode 100644 index 00000000000000..2ea8aa8874664b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_greek_modern_finetune_el.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Modern Greek (1453-) whisper_small_greek_modern_finetune WhisperForCTC from voxreality +author: John Snow Labs +name: whisper_small_greek_modern_finetune +date: 2024-11-11 +tags: [el, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: el +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_greek_modern_finetune` is a Modern Greek (1453-) model originally trained by voxreality. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_greek_modern_finetune_el_5.5.1_3.0_1731306201554.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_greek_modern_finetune_el_5.5.1_3.0_1731306201554.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_greek_modern_finetune","el") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_greek_modern_finetune", "el") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_greek_modern_finetune| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|el| +|Size:|1.7 GB| + +## References + +https://huggingface.co/voxreality/whisper-small-el-finetune \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_greek_modern_finetune_pipeline_el.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_greek_modern_finetune_pipeline_el.md new file mode 100644 index 00000000000000..5c89a54dbbd1e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_greek_modern_finetune_pipeline_el.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Modern Greek (1453-) whisper_small_greek_modern_finetune_pipeline pipeline WhisperForCTC from voxreality +author: John Snow Labs +name: whisper_small_greek_modern_finetune_pipeline +date: 2024-11-11 +tags: [el, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: el +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_greek_modern_finetune_pipeline` is a Modern Greek (1453-) model originally trained by voxreality. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_greek_modern_finetune_pipeline_el_5.5.1_3.0_1731306288945.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_greek_modern_finetune_pipeline_el_5.5.1_3.0_1731306288945.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_greek_modern_finetune_pipeline", lang = "el") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_greek_modern_finetune_pipeline", lang = "el") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_greek_modern_finetune_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|el| +|Size:|1.7 GB| + +## References + +https://huggingface.co/voxreality/whisper-small-el-finetune + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_malay_my.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_malay_my.md new file mode 100644 index 00000000000000..9b5ba542689f6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_malay_my.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Burmese whisper_small_malay WhisperForCTC from M00dler +author: John Snow Labs +name: whisper_small_malay +date: 2024-11-11 +tags: [my, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: my +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_malay` is a Burmese model originally trained by M00dler. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_malay_my_5.5.1_3.0_1731303130517.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_malay_my_5.5.1_3.0_1731303130517.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_malay","my") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_malay", "my") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_malay| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|my| +|Size:|1.7 GB| + +## References + +https://huggingface.co/M00dler/whisper-small-malay \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_malay_pipeline_my.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_malay_pipeline_my.md new file mode 100644 index 00000000000000..cd4e74b027787f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_malay_pipeline_my.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Burmese whisper_small_malay_pipeline pipeline WhisperForCTC from M00dler +author: John Snow Labs +name: whisper_small_malay_pipeline +date: 2024-11-11 +tags: [my, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: my +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_malay_pipeline` is a Burmese model originally trained by M00dler. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_malay_pipeline_my_5.5.1_3.0_1731303216891.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_malay_pipeline_my_5.5.1_3.0_1731303216891.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_malay_pipeline", lang = "my") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_malay_pipeline", lang = "my") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_malay_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|my| +|Size:|1.7 GB| + +## References + +https://huggingface.co/M00dler/whisper-small-malay + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_russian_f_pipeline_ru.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_russian_f_pipeline_ru.md new file mode 100644 index 00000000000000..5a1606ef225ed4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_russian_f_pipeline_ru.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Russian whisper_small_russian_f_pipeline pipeline WhisperForCTC from Garon16 +author: John Snow Labs +name: whisper_small_russian_f_pipeline +date: 2024-11-11 +tags: [ru, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ru +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_russian_f_pipeline` is a Russian model originally trained by Garon16. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_russian_f_pipeline_ru_5.5.1_3.0_1731304185329.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_russian_f_pipeline_ru_5.5.1_3.0_1731304185329.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_russian_f_pipeline", lang = "ru") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_russian_f_pipeline", lang = "ru") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_russian_f_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ru| +|Size:|1.7 GB| + +## References + +https://huggingface.co/Garon16/whisper_small_ru_f + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_russian_f_ru.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_russian_f_ru.md new file mode 100644 index 00000000000000..9bc6ca2f6fa1a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_russian_f_ru.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Russian whisper_small_russian_f WhisperForCTC from Garon16 +author: John Snow Labs +name: whisper_small_russian_f +date: 2024-11-11 +tags: [ru, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ru +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_russian_f` is a Russian model originally trained by Garon16. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_russian_f_ru_5.5.1_3.0_1731304097971.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_russian_f_ru_5.5.1_3.0_1731304097971.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_russian_f","ru") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_russian_f", "ru") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_russian_f| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ru| +|Size:|1.7 GB| + +## References + +https://huggingface.co/Garon16/whisper_small_ru_f \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_western_frisian_dutch_transfer_from_english_fy.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_western_frisian_dutch_transfer_from_english_fy.md new file mode 100644 index 00000000000000..6693aa1ec38031 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_western_frisian_dutch_transfer_from_english_fy.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Western Frisian whisper_small_western_frisian_dutch_transfer_from_english WhisperForCTC from polixonrio +author: John Snow Labs +name: whisper_small_western_frisian_dutch_transfer_from_english +date: 2024-11-11 +tags: [fy, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: fy +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_western_frisian_dutch_transfer_from_english` is a Western Frisian model originally trained by polixonrio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_western_frisian_dutch_transfer_from_english_fy_5.5.1_3.0_1731303370187.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_western_frisian_dutch_transfer_from_english_fy_5.5.1_3.0_1731303370187.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_western_frisian_dutch_transfer_from_english","fy") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_western_frisian_dutch_transfer_from_english", "fy") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_western_frisian_dutch_transfer_from_english| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|fy| +|Size:|1.7 GB| + +## References + +https://huggingface.co/polixonrio/whisper-small-fy-NL-Transfer-From-English \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_western_frisian_dutch_transfer_from_english_pipeline_fy.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_western_frisian_dutch_transfer_from_english_pipeline_fy.md new file mode 100644 index 00000000000000..b27ca32dd1b612 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_western_frisian_dutch_transfer_from_english_pipeline_fy.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Western Frisian whisper_small_western_frisian_dutch_transfer_from_english_pipeline pipeline WhisperForCTC from polixonrio +author: John Snow Labs +name: whisper_small_western_frisian_dutch_transfer_from_english_pipeline +date: 2024-11-11 +tags: [fy, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: fy +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_western_frisian_dutch_transfer_from_english_pipeline` is a Western Frisian model originally trained by polixonrio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_western_frisian_dutch_transfer_from_english_pipeline_fy_5.5.1_3.0_1731303454310.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_western_frisian_dutch_transfer_from_english_pipeline_fy_5.5.1_3.0_1731303454310.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_western_frisian_dutch_transfer_from_english_pipeline", lang = "fy") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_western_frisian_dutch_transfer_from_english_pipeline", lang = "fy") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_western_frisian_dutch_transfer_from_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|fy| +|Size:|1.7 GB| + +## References + +https://huggingface.co/polixonrio/whisper-small-fy-NL-Transfer-From-English + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_yt_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_yt_en.md new file mode 100644 index 00000000000000..47394fac83daa2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_yt_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_yt WhisperForCTC from PatrickML +author: John Snow Labs +name: whisper_small_yt +date: 2024-11-11 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_yt` is a English model originally trained by PatrickML. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_yt_en_5.5.1_3.0_1731304188617.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_yt_en_5.5.1_3.0_1731304188617.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_yt","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_yt", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_yt| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/PatrickML/whisper_small_yt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_small_yt_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_yt_pipeline_en.md new file mode 100644 index 00000000000000..0418dc7ef2e35e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_small_yt_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_yt_pipeline pipeline WhisperForCTC from PatrickML +author: John Snow Labs +name: whisper_small_yt_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_yt_pipeline` is a English model originally trained by PatrickML. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_yt_pipeline_en_5.5.1_3.0_1731304293862.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_yt_pipeline_en_5.5.1_3.0_1731304293862.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_yt_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_yt_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_yt_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/PatrickML/whisper_small_yt + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_amharic_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_amharic_en.md new file mode 100644 index 00000000000000..27666684376da2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_amharic_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_tiny_amharic WhisperForCTC from Gizachew +author: John Snow Labs +name: whisper_tiny_amharic +date: 2024-11-11 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_amharic` is a English model originally trained by Gizachew. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_amharic_en_5.5.1_3.0_1731302156104.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_amharic_en_5.5.1_3.0_1731302156104.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_amharic","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_amharic", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_amharic| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|390.2 MB| + +## References + +https://huggingface.co/Gizachew/whisper-tiny-amharic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_amharic_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_amharic_pipeline_en.md new file mode 100644 index 00000000000000..9c1d7e76a23b07 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_amharic_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_tiny_amharic_pipeline pipeline WhisperForCTC from Gizachew +author: John Snow Labs +name: whisper_tiny_amharic_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_amharic_pipeline` is a English model originally trained by Gizachew. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_amharic_pipeline_en_5.5.1_3.0_1731302179868.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_amharic_pipeline_en_5.5.1_3.0_1731302179868.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_amharic_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_amharic_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_amharic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|390.2 MB| + +## References + +https://huggingface.co/Gizachew/whisper-tiny-amharic + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_arabic_ar.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_arabic_ar.md new file mode 100644 index 00000000000000..3d4c57db055fc6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_arabic_ar.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Arabic whisper_tiny_arabic WhisperForCTC from Yassinevic +author: John Snow Labs +name: whisper_tiny_arabic +date: 2024-11-11 +tags: [ar, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ar +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_arabic` is a Arabic model originally trained by Yassinevic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_arabic_ar_5.5.1_3.0_1731302665054.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_arabic_ar_5.5.1_3.0_1731302665054.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_arabic","ar") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_arabic", "ar") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_arabic| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ar| +|Size:|389.9 MB| + +## References + +https://huggingface.co/Yassinevic/whisper-tiny-ar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_arabic_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_arabic_pipeline_ar.md new file mode 100644 index 00000000000000..9f24da1fd6d8e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_arabic_pipeline_ar.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Arabic whisper_tiny_arabic_pipeline pipeline WhisperForCTC from Yassinevic +author: John Snow Labs +name: whisper_tiny_arabic_pipeline +date: 2024-11-11 +tags: [ar, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ar +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_arabic_pipeline` is a Arabic model originally trained by Yassinevic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_arabic_pipeline_ar_5.5.1_3.0_1731302688987.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_arabic_pipeline_ar_5.5.1_3.0_1731302688987.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_arabic_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_arabic_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_arabic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|389.9 MB| + +## References + +https://huggingface.co/Yassinevic/whisper-tiny-ar + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_chinese_antares28_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_chinese_antares28_en.md new file mode 100644 index 00000000000000..2ced5bce8795dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_chinese_antares28_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_tiny_chinese_antares28 WhisperForCTC from Antares28 +author: John Snow Labs +name: whisper_tiny_chinese_antares28 +date: 2024-11-11 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_chinese_antares28` is a English model originally trained by Antares28. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_chinese_antares28_en_5.5.1_3.0_1731305756005.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_chinese_antares28_en_5.5.1_3.0_1731305756005.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_chinese_antares28","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_chinese_antares28", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_chinese_antares28| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|389.9 MB| + +## References + +https://huggingface.co/Antares28/whisper-tiny-zh \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_chinese_antares28_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_chinese_antares28_pipeline_en.md new file mode 100644 index 00000000000000..870ef5a471d8f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_chinese_antares28_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_tiny_chinese_antares28_pipeline pipeline WhisperForCTC from Antares28 +author: John Snow Labs +name: whisper_tiny_chinese_antares28_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_chinese_antares28_pipeline` is a English model originally trained by Antares28. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_chinese_antares28_pipeline_en_5.5.1_3.0_1731305777373.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_chinese_antares28_pipeline_en_5.5.1_3.0_1731305777373.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_chinese_antares28_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_chinese_antares28_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_chinese_antares28_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|389.9 MB| + +## References + +https://huggingface.co/Antares28/whisper-tiny-zh + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_nob_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_nob_en.md new file mode 100644 index 00000000000000..1a9a08d9d62c8a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_nob_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_tiny_nob WhisperForCTC from NbAiLab +author: John Snow Labs +name: whisper_tiny_nob +date: 2024-11-11 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_nob` is a English model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_nob_en_5.5.1_3.0_1731303096066.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_nob_en_5.5.1_3.0_1731303096066.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_nob","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_nob", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_nob| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|390.8 MB| + +## References + +https://huggingface.co/NbAiLab/whisper-tiny-nob \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_nob_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_nob_pipeline_en.md new file mode 100644 index 00000000000000..16a1fec2c833c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_nob_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_tiny_nob_pipeline pipeline WhisperForCTC from NbAiLab +author: John Snow Labs +name: whisper_tiny_nob_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_nob_pipeline` is a English model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_nob_pipeline_en_5.5.1_3.0_1731303117221.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_nob_pipeline_en_5.5.1_3.0_1731303117221.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_nob_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_nob_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_nob_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|390.8 MB| + +## References + +https://huggingface.co/NbAiLab/whisper-tiny-nob + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_v2_2_romanian_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_v2_2_romanian_en.md new file mode 100644 index 00000000000000..fa824e6d0d9ca6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_v2_2_romanian_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_tiny_v2_2_romanian WhisperForCTC from giigii91 +author: John Snow Labs +name: whisper_tiny_v2_2_romanian +date: 2024-11-11 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_v2_2_romanian` is a English model originally trained by giigii91. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_v2_2_romanian_en_5.5.1_3.0_1731302411636.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_v2_2_romanian_en_5.5.1_3.0_1731302411636.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_v2_2_romanian","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_v2_2_romanian", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_v2_2_romanian| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|374.7 MB| + +## References + +https://huggingface.co/giigii91/whisper-tiny_v2.2-ro \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_v2_2_romanian_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_v2_2_romanian_pipeline_en.md new file mode 100644 index 00000000000000..9ee97f1589783a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-whisper_tiny_v2_2_romanian_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_tiny_v2_2_romanian_pipeline pipeline WhisperForCTC from giigii91 +author: John Snow Labs +name: whisper_tiny_v2_2_romanian_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_v2_2_romanian_pipeline` is a English model originally trained by giigii91. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_v2_2_romanian_pipeline_en_5.5.1_3.0_1731302438168.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_v2_2_romanian_pipeline_en_5.5.1_3.0_1731302438168.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_v2_2_romanian_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_v2_2_romanian_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_v2_2_romanian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|374.7 MB| + +## References + +https://huggingface.co/giigii91/whisper-tiny_v2.2-ro + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-workprocess_24_10_01_en.md b/docs/_posts/ahmedlone127/2024-11-11-workprocess_24_10_01_en.md new file mode 100644 index 00000000000000..4b669d94a3965f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-workprocess_24_10_01_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English workprocess_24_10_01 BertForSequenceClassification from shshin0317 +author: John Snow Labs +name: workprocess_24_10_01 +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`workprocess_24_10_01` is a English model originally trained by shshin0317. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/workprocess_24_10_01_en_5.5.1_3.0_1731309636847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/workprocess_24_10_01_en_5.5.1_3.0_1731309636847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("workprocess_24_10_01","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("workprocess_24_10_01", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|workprocess_24_10_01| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|414.9 MB| + +## References + +https://huggingface.co/shshin0317/workprocess_24_10_01 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-workprocess_24_10_01_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-workprocess_24_10_01_pipeline_en.md new file mode 100644 index 00000000000000..76ac4fd47a25ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-workprocess_24_10_01_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English workprocess_24_10_01_pipeline pipeline BertForSequenceClassification from shshin0317 +author: John Snow Labs +name: workprocess_24_10_01_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`workprocess_24_10_01_pipeline` is a English model originally trained by shshin0317. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/workprocess_24_10_01_pipeline_en_5.5.1_3.0_1731309664028.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/workprocess_24_10_01_pipeline_en_5.5.1_3.0_1731309664028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("workprocess_24_10_01_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("workprocess_24_10_01_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|workprocess_24_10_01_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|414.9 MB| + +## References + +https://huggingface.co/shshin0317/workprocess_24_10_01 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-xlm_cebinary_vmo2_large_3_en.md b/docs/_posts/ahmedlone127/2024-11-11-xlm_cebinary_vmo2_large_3_en.md new file mode 100644 index 00000000000000..0304b680ab9936 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-xlm_cebinary_vmo2_large_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_cebinary_vmo2_large_3 MPNetForSequenceClassification from enochlev +author: John Snow Labs +name: xlm_cebinary_vmo2_large_3 +date: 2024-11-11 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_cebinary_vmo2_large_3` is a English model originally trained by enochlev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_cebinary_vmo2_large_3_en_5.5.1_3.0_1731301250606.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_cebinary_vmo2_large_3_en_5.5.1_3.0_1731301250606.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("xlm_cebinary_vmo2_large_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("xlm_cebinary_vmo2_large_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_cebinary_vmo2_large_3| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.3 MB| + +## References + +https://huggingface.co/enochlev/XLM-CEBinary-VMO2-large-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-xlm_cebinary_vmo2_large_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-xlm_cebinary_vmo2_large_3_pipeline_en.md new file mode 100644 index 00000000000000..e5ae9d36ca4bd9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-xlm_cebinary_vmo2_large_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_cebinary_vmo2_large_3_pipeline pipeline MPNetForSequenceClassification from enochlev +author: John Snow Labs +name: xlm_cebinary_vmo2_large_3_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_cebinary_vmo2_large_3_pipeline` is a English model originally trained by enochlev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_cebinary_vmo2_large_3_pipeline_en_5.5.1_3.0_1731301272226.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_cebinary_vmo2_large_3_pipeline_en_5.5.1_3.0_1731301272226.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_cebinary_vmo2_large_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_cebinary_vmo2_large_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_cebinary_vmo2_large_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.3 MB| + +## References + +https://huggingface.co/enochlev/XLM-CEBinary-VMO2-large-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_conll2003_en.md b/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_conll2003_en.md new file mode 100644 index 00000000000000..f08eb1efd9d2ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_conll2003_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_conll2003 XlmRoBertaForTokenClassification from Amir13 +author: John Snow Labs +name: xlm_roberta_base_conll2003 +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_conll2003` is a English model originally trained by Amir13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_conll2003_en_5.5.1_3.0_1731293705042.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_conll2003_en_5.5.1_3.0_1731293705042.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_conll2003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_conll2003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_conll2003| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|843.4 MB| + +## References + +https://huggingface.co/Amir13/xlm-roberta-base-conll2003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_conll2003_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_conll2003_pipeline_en.md new file mode 100644 index 00000000000000..97ba29a24389e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_conll2003_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_conll2003_pipeline pipeline XlmRoBertaForTokenClassification from Amir13 +author: John Snow Labs +name: xlm_roberta_base_conll2003_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_conll2003_pipeline` is a English model originally trained by Amir13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_conll2003_pipeline_en_5.5.1_3.0_1731293773356.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_conll2003_pipeline_en_5.5.1_3.0_1731293773356.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_conll2003_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_conll2003_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_conll2003_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|843.4 MB| + +## References + +https://huggingface.co/Amir13/xlm-roberta-base-conll2003 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_en.md b/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_en.md new file mode 100644 index 00000000000000..857973173c127a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_0ppxnhximxr XlmRoBertaForTokenClassification from 0ppxnhximxr +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_0ppxnhximxr +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_0ppxnhximxr` is a English model originally trained by 0ppxnhximxr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_en_5.5.1_3.0_1731293742525.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_en_5.5.1_3.0_1731293742525.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_0ppxnhximxr","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_0ppxnhximxr", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_0ppxnhximxr| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|840.8 MB| + +## References + +https://huggingface.co/0ppxnhximxr/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_pipeline_en.md new file mode 100644 index 00000000000000..e5f51a292de85e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_pipeline pipeline XlmRoBertaForTokenClassification from 0ppxnhximxr +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_pipeline` is a English model originally trained by 0ppxnhximxr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_pipeline_en_5.5.1_3.0_1731293829031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_pipeline_en_5.5.1_3.0_1731293829031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_0ppxnhximxr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|840.8 MB| + +## References + +https://huggingface.co/0ppxnhximxr/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_finetuned_panx_german_skr3178_en.md b/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_finetuned_panx_german_skr3178_en.md new file mode 100644 index 00000000000000..713a4393c505a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_finetuned_panx_german_skr3178_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_skr3178 XlmRoBertaForTokenClassification from skr3178 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_skr3178 +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_skr3178` is a English model originally trained by skr3178. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_skr3178_en_5.5.1_3.0_1731293331803.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_skr3178_en_5.5.1_3.0_1731293331803.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_skr3178","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_skr3178", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_skr3178| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/skr3178/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_finetuned_panx_german_skr3178_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_finetuned_panx_german_skr3178_pipeline_en.md new file mode 100644 index 00000000000000..a392c47aa99014 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-xlm_roberta_base_finetuned_panx_german_skr3178_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_skr3178_pipeline pipeline XlmRoBertaForTokenClassification from skr3178 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_skr3178_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_skr3178_pipeline` is a English model originally trained by skr3178. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_skr3178_pipeline_en_5.5.1_3.0_1731293402415.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_skr3178_pipeline_en_5.5.1_3.0_1731293402415.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_skr3178_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_skr3178_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_skr3178_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/skr3178/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-xlm_word_shopsign_pretrained_en.md b/docs/_posts/ahmedlone127/2024-11-11-xlm_word_shopsign_pretrained_en.md new file mode 100644 index 00000000000000..fee4f4bd6ab09a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-xlm_word_shopsign_pretrained_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_word_shopsign_pretrained XlmRoBertaForTokenClassification from HyungYoun +author: John Snow Labs +name: xlm_word_shopsign_pretrained +date: 2024-11-11 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_word_shopsign_pretrained` is a English model originally trained by HyungYoun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_word_shopsign_pretrained_en_5.5.1_3.0_1731293658556.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_word_shopsign_pretrained_en_5.5.1_3.0_1731293658556.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_word_shopsign_pretrained","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_word_shopsign_pretrained", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_word_shopsign_pretrained| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|794.3 MB| + +## References + +https://huggingface.co/HyungYoun/xlm-word-shopsign-pretrained \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-11-11-xlm_word_shopsign_pretrained_pipeline_en.md b/docs/_posts/ahmedlone127/2024-11-11-xlm_word_shopsign_pretrained_pipeline_en.md new file mode 100644 index 00000000000000..7315a5c1e26498 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-11-11-xlm_word_shopsign_pretrained_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_word_shopsign_pretrained_pipeline pipeline XlmRoBertaForTokenClassification from HyungYoun +author: John Snow Labs +name: xlm_word_shopsign_pretrained_pipeline +date: 2024-11-11 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_word_shopsign_pretrained_pipeline` is a English model originally trained by HyungYoun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_word_shopsign_pretrained_pipeline_en_5.5.1_3.0_1731293795355.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_word_shopsign_pretrained_pipeline_en_5.5.1_3.0_1731293795355.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_word_shopsign_pretrained_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_word_shopsign_pretrained_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_word_shopsign_pretrained_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|794.3 MB| + +## References + +https://huggingface.co/HyungYoun/xlm-word-shopsign-pretrained + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/danilojsl/2024-10-03-blip_vqa_base_en.md b/docs/_posts/danilojsl/2024-10-03-blip_vqa_base_en.md new file mode 100644 index 00000000000000..798005ef178919 --- /dev/null +++ b/docs/_posts/danilojsl/2024-10-03-blip_vqa_base_en.md @@ -0,0 +1,107 @@ +--- +layout: model +title: BLIP Question Answering +author: John Snow Labs +name: blip_vqa_base +date: 2024-10-03 +tags: [en, open_source, tensorflow] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.4 +supported: true +engine: tensorflow +annotator: BLIPForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +BLIP Model for visual question answering. The model consists of a vision encoder, a text encoder as well as a text decoder. The vision encoder will encode the input image, the text encoder will encode the input question together with the encoding of the image, and the text decoder will output the answer to the question. + +## Predicted Entities + + + +{:.btn-box} + +[Open in Colab](https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace_in_Spark_NLP_BLIPForQuestionAnswering.ipynb){:.button.button-orange.button-orange-trans.co.button-icon} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/blip_vqa_base_en_5.5.0_3.4_1727997969354.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/blip_vqa_base_en_5.5.0_3.4_1727997969354.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + +To proceed, please create a DataFrame with two columns: + +- An image column that contains the file path for each image in the directory. +- A text column where you can input the specific question you would like to ask about each image. + +For example: + +```python +from pyspark.sql.functions import lit + +images_path = "./images/" +image_df = spark.read.format("image").load(path=images_path) + +test_df = image_df.withColumn("text", lit("What's this picture about?")) +test_df.show() +``` + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +imageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") \ + +imageClassifier = BLIPForQuestionAnswering.load("./{}_spark_nlp".format(MODEL_NAME)) \ + .setInputCols("image_assembler") \ + .setOutputCol("answer") \ + .setSize(384) + +pipeline = Pipeline( + stages=[ + imageAssembler, + imageClassifier, + ] +) + +model = pipeline.fit(test_df) +result = model.transform(test_df) +result.select("image_assembler.origin", "answer.result").show(truncate = False) +``` +```scala +val imageAssembler: ImageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val loadModel = BLIPForQuestionAnswering + .pretrained() + .setInputCols("image_assembler") + .setOutputCol("answer") + .setSize(384) + +val newPipeline: Pipeline = + new Pipeline().setStages(Array(imageAssembler, loadModel)) + +newPipeline.fit(testDF) +val result = model.transform(testDF) + +result.select("image_assembler.origin", "answer.result").show(truncate = false) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|blip_vqa_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.4 GB| \ No newline at end of file diff --git a/docs/_posts/gadde5300/2024-11-13-roberta_embeddings_legal_roberta_base_en.md b/docs/_posts/gadde5300/2024-11-13-roberta_embeddings_legal_roberta_base_en.md new file mode 100644 index 00000000000000..77ef36bb4ee436 --- /dev/null +++ b/docs/_posts/gadde5300/2024-11-13-roberta_embeddings_legal_roberta_base_en.md @@ -0,0 +1,109 @@ +--- +layout: model +title: English Legal RoBERTa Embeddings (CaseLaw, Base, Cased) +author: John Snow Labs +name: roberta_embeddings_legal_roberta_base +date: 2024-11-13 +tags: [roberta, embeddings, en, open_source, tensorflow] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Legal RoBERTa Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `legal-roberta-base` is a English model orginally trained by `saibo`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_embeddings_legal_roberta_base_en_5.5.0_3.0_1731462634993.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_embeddings_legal_roberta_base_en_5.5.0_3.0_1731462634993.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("document") + +tokenizer = Tokenizer() \ +.setInputCols("document") \ +.setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_embeddings_legal_roberta_base","en") \ +.setInputCols(["document", "token"]) \ +.setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["I love Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols(Array("document")) +.setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_embeddings_legal_roberta_base","en") +.setInputCols(Array("document", "token")) +.setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("I love Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.embed.legal_roberta_base").predict("""I love Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_embeddings_legal_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|468.9 MB| +|Case sensitive:|true| + +## Benchmarking + +```bash +- https://huggingface.co/saibo/legal-roberta-base +- https://www.kaggle.com/uspto/patent-litigations +- https://case.law/ +- https://www.kaggle.com/bigquery/patents +- https://www.kaggle.com/sohier/beyond-queries-exploring-the-bigquery-api +``` \ No newline at end of file