Models hub (#14418)

JohnSnowLabs · Sep 26, 2024 · 73f6626 · 73f6626
1 parent 5a2c24d
commit 73f6626
Show file tree

Hide file tree

Showing 1,359 changed files with 112,284 additions and 0 deletions.
diff --git a/docs/_posts/ahmedlone127/2024-09-07-somd_xlm_stage1_v2_en.md b/docs/_posts/ahmedlone127/2024-09-07-somd_xlm_stage1_v2_en.md
@@ -0,0 +1,94 @@
+---
+layout: model
+title: English somd_xlm_stage1_v2 XlmRoBertaForTokenClassification from ThuyNT03
+author: John Snow Labs
+name: somd_xlm_stage1_v2
+date: 2024-09-07
+tags: [en, open_source, onnx, token_classification, xlm_roberta, ner]
+task: Named Entity Recognition
+language: en
+edition: Spark NLP 5.5.0
+spark_version: 3.0
+supported: true
+engine: onnx
+annotator: XlmRoBertaForTokenClassification
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`somd_xlm_stage1_v2` is a English model originally trained by ThuyNT03.
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/somd_xlm_stage1_v2_en_5.5.0_3.0_1725687603645.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/somd_xlm_stage1_v2_en_5.5.0_3.0_1725687603645.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+
+documentAssembler = DocumentAssembler() \
+    .setInputCol('text') \
+    .setOutputCol('document')
+
+tokenizer = Tokenizer() \
+    .setInputCols(['document']) \
+    .setOutputCol('token')
+
+tokenClassifier  = XlmRoBertaForTokenClassification.pretrained("somd_xlm_stage1_v2","en") \
+     .setInputCols(["documents","token"]) \
+     .setOutputCol("ner")
+
+pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier])
+data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text")
+pipelineModel = pipeline.fit(data)
+pipelineDF = pipelineModel.transform(data)
+
+```
+```scala
+
+val documentAssembler = new DocumentAssembler()
+    .setInputCols("text")
+    .setOutputCols("document")
+
+val tokenizer = new Tokenizer()
+    .setInputCols("document")
+    .setOutputCol("token")
+
+val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("somd_xlm_stage1_v2", "en")
+    .setInputCols(Array("documents","token")) 
+    .setOutputCol("ner") 
+
+val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier))
+val data = Seq("I love spark-nlp").toDS.toDF("text")
+val pipelineModel = pipeline.fit(data)
+val pipelineDF = pipelineModel.transform(data)
+
+```
+</div>
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|somd_xlm_stage1_v2|
+|Compatibility:|Spark NLP 5.5.0+|
+|License:|Open Source|
+|Edition:|Official|
+|Input Labels:|[document, token]|
+|Output Labels:|[ner]|
+|Language:|en|
+|Size:|797.6 MB|
+
+## References
+
+https://huggingface.co/ThuyNT03/SOMD-xlm-stage1-v2
diff --git a/docs/_posts/ahmedlone127/2024-09-09-cot_ep3_42_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-09-cot_ep3_42_pipeline_en.md
@@ -0,0 +1,69 @@
+---
+layout: model
+title: English cot_ep3_42_pipeline pipeline MPNetEmbeddings from ingeol
+author: John Snow Labs
+name: cot_ep3_42_pipeline
+date: 2024-09-09
+tags: [en, open_source, pipeline, onnx]
+task: Embeddings
+language: en
+edition: Spark NLP 5.5.0
+spark_version: 3.0
+supported: true
+annotator: PipelineModel
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cot_ep3_42_pipeline` is a English model originally trained by ingeol.
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cot_ep3_42_pipeline_en_5.5.0_3.0_1725897373617.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cot_ep3_42_pipeline_en_5.5.0_3.0_1725897373617.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+
+pipeline = PretrainedPipeline("cot_ep3_42_pipeline", lang = "en")
+annotations =  pipeline.transform(df)   
+
+```
+```scala
+
+val pipeline = new PretrainedPipeline("cot_ep3_42_pipeline", lang = "en")
+val annotations = pipeline.transform(df)
+
+```
+</div>
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|cot_ep3_42_pipeline|
+|Type:|pipeline|
+|Compatibility:|Spark NLP 5.5.0+|
+|License:|Open Source|
+|Edition:|Official|
+|Language:|en|
+|Size:|407.1 MB|
+
+## References
+
+https://huggingface.co/ingeol/cot_ep3_42
+
+## Included Models
+
+- DocumentAssembler
+- MPNetEmbeddings
diff --git a/docs/_posts/ahmedlone127/2024-09-11-action_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-11-action_pipeline_en.md
@@ -0,0 +1,70 @@
+---
+layout: model
+title: English action_pipeline pipeline DistilBertForSequenceClassification from SergeyTW
+author: John Snow Labs
+name: action_pipeline
+date: 2024-09-11
+tags: [en, open_source, pipeline, onnx]
+task: Text Classification
+language: en
+edition: Spark NLP 5.5.0
+spark_version: 3.0
+supported: true
+annotator: PipelineModel
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`action_pipeline` is a English model originally trained by SergeyTW.
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/action_pipeline_en_5.5.0_3.0_1726014444476.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/action_pipeline_en_5.5.0_3.0_1726014444476.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+
+pipeline = PretrainedPipeline("action_pipeline", lang = "en")
+annotations =  pipeline.transform(df)   
+
+```
+```scala
+
+val pipeline = new PretrainedPipeline("action_pipeline", lang = "en")
+val annotations = pipeline.transform(df)
+
+```
+</div>
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|action_pipeline|
+|Type:|pipeline|
+|Compatibility:|Spark NLP 5.5.0+|
+|License:|Open Source|
+|Edition:|Official|
+|Language:|en|
+|Size:|249.5 MB|
+
+## References
+
+https://huggingface.co/SergeyTW/action
+
+## Included Models
+
+- DocumentAssembler
+- TokenizerModel
+- DistilBertForSequenceClassification
diff --git a/docs/_posts/ahmedlone127/2024-09-11-finetuned_mixed_2epochs_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-11-finetuned_mixed_2epochs_pipeline_en.md
@@ -0,0 +1,69 @@
+---
+layout: model
+title: English finetuned_mixed_2epochs_pipeline pipeline MPNetEmbeddings from jhsmith
+author: John Snow Labs
+name: finetuned_mixed_2epochs_pipeline
+date: 2024-09-11
+tags: [en, open_source, pipeline, onnx]
+task: Embeddings
+language: en
+edition: Spark NLP 5.5.0
+spark_version: 3.0
+supported: true
+annotator: PipelineModel
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_mixed_2epochs_pipeline` is a English model originally trained by jhsmith.
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_mixed_2epochs_pipeline_en_5.5.0_3.0_1726054541841.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_mixed_2epochs_pipeline_en_5.5.0_3.0_1726054541841.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+
+pipeline = PretrainedPipeline("finetuned_mixed_2epochs_pipeline", lang = "en")
+annotations =  pipeline.transform(df)   
+
+```
+```scala
+
+val pipeline = new PretrainedPipeline("finetuned_mixed_2epochs_pipeline", lang = "en")
+val annotations = pipeline.transform(df)
+
+```
+</div>
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|finetuned_mixed_2epochs_pipeline|
+|Type:|pipeline|
+|Compatibility:|Spark NLP 5.5.0+|
+|License:|Open Source|
+|Edition:|Official|
+|Language:|en|
+|Size:|407.4 MB|
+
+## References
+
+https://huggingface.co/jhsmith/finetuned_mixed_2epochs
+
+## Included Models
+
+- DocumentAssembler
+- MPNetEmbeddings
diff --git a/docs/_posts/ahmedlone127/2024-09-11-uned_tfg_08_77_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-11-uned_tfg_08_77_pipeline_en.md
@@ -0,0 +1,70 @@
+---
+layout: model
+title: English uned_tfg_08_77_pipeline pipeline RoBertaForSequenceClassification from alexisdr
+author: John Snow Labs
+name: uned_tfg_08_77_pipeline
+date: 2024-09-11
+tags: [en, open_source, pipeline, onnx]
+task: Text Classification
+language: en
+edition: Spark NLP 5.5.0
+spark_version: 3.0
+supported: true
+annotator: PipelineModel
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`uned_tfg_08_77_pipeline` is a English model originally trained by alexisdr.
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/uned_tfg_08_77_pipeline_en_5.5.0_3.0_1726090867835.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/uned_tfg_08_77_pipeline_en_5.5.0_3.0_1726090867835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+
+pipeline = PretrainedPipeline("uned_tfg_08_77_pipeline", lang = "en")
+annotations =  pipeline.transform(df)   
+
+```
+```scala
+
+val pipeline = new PretrainedPipeline("uned_tfg_08_77_pipeline", lang = "en")
+val annotations = pipeline.transform(df)
+
+```
+</div>
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|uned_tfg_08_77_pipeline|
+|Type:|pipeline|
+|Compatibility:|Spark NLP 5.5.0+|
+|License:|Open Source|
+|Edition:|Official|
+|Language:|en|
+|Size:|443.8 MB|
+
+## References
+
+https://huggingface.co/alexisdr/uned-tfg-08.77
+
+## Included Models
+
+- DocumentAssembler
+- TokenizerModel
+- RoBertaForSequenceClassification